Index: include/llvm/Transforms/Vectorize/SLPVectorizer.h =================================================================== --- include/llvm/Transforms/Vectorize/SLPVectorizer.h +++ include/llvm/Transforms/Vectorize/SLPVectorizer.h @@ -59,8 +59,8 @@ struct SLPVectorizerPass : public PassInfoMixin { using StoreList = SmallVector; using StoreListMap = MapVector; - using WeakTrackingVHList = SmallVector; - using WeakTrackingVHListMap = MapVector; + using GEPList = SmallVector; + using GEPListMap = MapVector; ScalarEvolution *SE = nullptr; TargetTransformInfo *TTI = nullptr; @@ -131,7 +131,7 @@ /// Tries to vectorize constructs started from CmpInst, InsertValueInst or /// InsertElementInst instructions. - bool vectorizeSimpleInstructions(SmallVectorImpl &Instructions, + bool vectorizeSimpleInstructions(SmallVectorImpl &Instructions, BasicBlock *BB, slpvectorizer::BoUpSLP &R); /// \brief Scan the basic block and look for patterns that are likely to start @@ -147,7 +147,7 @@ StoreListMap Stores; /// The getelementptr instructions in a basic block organized by base pointer. - WeakTrackingVHListMap GEPs; + GEPListMap GEPs; }; } // end namespace llvm Index: lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/SLPVectorizer.cpp +++ lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -627,6 +627,14 @@ OptimizationRemarkEmitter *getORE() { return ORE; } + /// Checks if the instruction is marked for deletion. + bool isDeleted(Instruction *I) const { return DeletedInstructions.count(I); } + + /// Marks values for later deletion. + void eraseInstructions(ArrayRef AV); + + ~BoUpSLP(); + private: struct TreeEntry; @@ -814,14 +822,12 @@ /// AliasCache, which can happen if a new instruction is allocated at the /// same address as a previously deleted instruction. void eraseInstruction(Instruction *I) { - I->removeFromParent(); - I->dropAllReferences(); - DeletedInstructions.emplace_back(I); + DeletedInstructions.insert(I); } /// Temporary store for deleted instructions. Instructions will be deleted /// eventually when the BoUpSLP is destructed. - SmallVector DeletedInstructions; + SmallPtrSet DeletedInstructions; /// A list of values that need to extracted out of the tree. /// This list holds pairs of (Internal Scalar : External User). External User @@ -1303,6 +1309,24 @@ } // end namespace llvm +BoUpSLP::~BoUpSLP() { + std::for_each(DeletedInstructions.begin(), DeletedInstructions.end(), + [](Instruction *I) { I->dropAllReferences(); }); + std::for_each(DeletedInstructions.begin(), DeletedInstructions.end(), + [](Instruction *I) { + assert(I->use_empty() && + "trying to erase instruction with users."); + I->eraseFromParent(); + }); +} + +void BoUpSLP::eraseInstructions(ArrayRef AV) { + std::for_each(AV.begin(), AV.end(), [this](Value *V) { + if (auto *I = dyn_cast(V)) + eraseInstruction(I); + }); +} + void BoUpSLP::buildTree(ArrayRef Roots, ArrayRef UserIgnoreLst) { ExtraValueToDebugLocsMap ExternallyUsedValues; @@ -2688,7 +2712,7 @@ // Generate the 'InsertElement' instruction. for (unsigned i = 0; i < Ty->getNumElements(); ++i) { Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i)); - if (Instruction *Insrt = dyn_cast(Vec)) { + if (auto *Insrt = dyn_cast(Vec)) { GatherSeq.insert(Insrt); CSEBlocks.insert(Insrt->getParent()); @@ -3277,20 +3301,18 @@ for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) { Value *Scalar = Entry->Scalars[Lane]; +#ifndef NDEBUG Type *Ty = Scalar->getType(); if (!Ty->isVoidTy()) { -#ifndef NDEBUG for (User *U : Scalar->users()) { DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n"); - // It is legal to replace users in the ignorelist by undef. + // It is legal to delete users in the ignorelist. assert((getTreeEntry(U) || is_contained(UserIgnoreList, U)) && - "Replacing out-of-tree value with undef"); + "Deleting out-of-tree value"); } -#endif - Value *Undef = UndefValue::get(Ty); - Scalar->replaceAllUsesWith(Undef); } +#endif DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n"); eraseInstruction(cast(Scalar)); } @@ -3305,10 +3327,8 @@ DEBUG(dbgs() << "SLP: Optimizing " << GatherSeq.size() << " gather sequences instructions.\n"); // LICM InsertElementInst sequences. - for (Instruction *it : GatherSeq) { - InsertElementInst *Insert = dyn_cast(it); - - if (!Insert) + for (auto *Insert : GatherSeq) { + if (isDeleted(Insert)) continue; // Check if this block is inside a loop. @@ -3362,6 +3382,8 @@ // For all instructions in blocks containing gather sequences: for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) { Instruction *In = &*it++; + if (isDeleted(In)) + continue; if (!isa(In) && !isa(In)) continue; @@ -4237,19 +4259,6 @@ return Changed; } -/// \brief Check that the Values in the slice in VL array are still existent in -/// the WeakTrackingVH array. -/// Vectorization of part of the VL array may cause later values in the VL array -/// to become invalid. We track when this has happened in the WeakTrackingVH -/// array. -static bool hasValueBeenRAUWed(ArrayRef VL, - ArrayRef VH, unsigned SliceBegin, - unsigned SliceSize) { - VL = VL.slice(SliceBegin, SliceSize); - VH = VH.slice(SliceBegin, SliceSize); - return !std::equal(VL.begin(), VL.end(), VH.begin()); -} - bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef Chain, BoUpSLP &R, unsigned VecRegSize) { unsigned ChainLen = Chain.size(); @@ -4261,22 +4270,21 @@ if (!isPowerOf2_32(Sz) || VF < 2) return false; - // Keep track of values that were deleted by vectorizing in the loop below. - SmallVector TrackValues(Chain.begin(), Chain.end()); - bool Changed = false; // Look for profitable vectorizable trees at all offsets, starting at zero. for (unsigned i = 0, e = ChainLen; i < e; ++i) { if (i + VF > e) break; + ArrayRef Operands = Chain.slice(i, VF); // Check that a previous iteration of this loop did not delete the Value. - if (hasValueBeenRAUWed(Chain, TrackValues, i, VF)) + if (llvm::any_of(Operands, [&R](Value *V) { + auto *I = dyn_cast(V); + return I && R.isDeleted(I); + })) continue; - DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << i << "\n"); - ArrayRef Operands = Chain.slice(i, VF); R.buildTree(Operands); if (R.isTreeTinyAndNotFullyVectorizable()) @@ -4454,9 +4462,6 @@ bool Changed = false; - // Keep track of values that were deleted by vectorizing in the loop below. - SmallVector TrackValues(VL.begin(), VL.end()); - unsigned NextInst = 0, MaxInst = VL.size(); for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF; VF /= 2) { @@ -4477,13 +4482,16 @@ if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2) break; + ArrayRef Ops = VL.slice(I, OpsWidth); // Check that a previous iteration of this loop did not delete the Value. - if (hasValueBeenRAUWed(VL, TrackValues, I, OpsWidth)) + if (llvm::any_of(Ops, [&R](Value *V) { + auto *I = dyn_cast(V); + return I && R.isDeleted(I); + })) continue; DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations " << "\n"); - ArrayRef Ops = VL.slice(I, OpsWidth); ArrayRef BuildVectorSlice; if (!BuildVector.empty()) @@ -4654,7 +4662,9 @@ /// *p = /// class HorizontalReduction { - SmallVector ReductionOps; + using ReductionOpsType = SmallVector; + using ReductionOpsListType = SmallVector; + ReductionOpsListType ReductionOps; SmallVector ReducedVals; // Use map vector to make stable output. MapVector ExtraArgs; @@ -4695,6 +4705,37 @@ (Kind == RK_UMin || Kind == RK_UMax))); } + /// Creates reduction operation with the current opcode. + Value *createOp(IRBuilder<> &Builder, const Twine &Name) const { + assert(isVectorizable() && + "Expected add|fadd or min/max reduction operation."); + Value *Cmp; + switch (Kind) { + case RK_Arithmetic: + return Builder.CreateBinOp((Instruction::BinaryOps)Opcode, LHS, RHS, + Name); + case RK_Min: + Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSLT(LHS, RHS) + : Builder.CreateFCmpOLT(LHS, RHS); + break; + case RK_Max: + Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSGT(LHS, RHS) + : Builder.CreateFCmpOGT(LHS, RHS); + break; + case RK_UMin: + assert(Opcode == Instruction::ICmp && "Expected integer types."); + Cmp = Builder.CreateICmpULT(LHS, RHS); + break; + case RK_UMax: + assert(Opcode == Instruction::ICmp && "Expected integer types."); + Cmp = Builder.CreateICmpUGT(LHS, RHS); + break; + case RK_None: + llvm_unreachable("Unknown reduction operation."); + } + return Builder.CreateSelect(Cmp, LHS, RHS, Name); + } + public: explicit OperationData() = default; @@ -4766,6 +4807,44 @@ llvm_unreachable("Reduction kind is not set"); } + void initReductionOps(ReductionOpsListType &ReductionOps) { + assert(Kind != RK_None && !!*this && LHS && RHS && + "Expected reduction operation."); + switch (Kind) { + case RK_Arithmetic: + ReductionOps.assign(1, ReductionOpsType()); + break; + case RK_Min: + case RK_UMin: + case RK_Max: + case RK_UMax: + ReductionOps.assign(2, ReductionOpsType()); + break; + case RK_None: + llvm_unreachable("Reduction kind is not set"); + } + } + /// Add all reduction operations for the reduction instruction \p I. + void addReductionOps(Instruction *I, ReductionOpsListType &ReductionOps) { + assert(Kind != RK_None && !!*this && LHS && RHS && + "Expected reduction operation."); + switch (Kind) { + case RK_Arithmetic: + ReductionOps[0].emplace_back(I); + break; + case RK_Min: + case RK_UMin: + case RK_Max: + case RK_UMax: + if (cast(I)->getCondition()->hasOneUse()) + ReductionOps[0].emplace_back(cast(I)->getCondition()); + ReductionOps[1].emplace_back(I); + break; + case RK_None: + llvm_unreachable("Reduction kind is not set"); + } + } + /// Checks if instruction is associative and can be vectorized. bool isAssociative(Instruction *I) const { assert(Kind != RK_None && *this && LHS && RHS && @@ -4834,36 +4913,57 @@ llvm_unreachable("Reduction kind is not set"); } - /// Creates reduction operation with the current opcode. - Value *createOp(IRBuilder<> &Builder, const Twine &Name = "") const { + /// Creates reduction operation with the current opcode with the IR flags + /// from \p ReductionOps. + Value *createOp(IRBuilder<> &Builder, const Twine &Name, + const ReductionOpsListType &ReductionOps) const { assert(isVectorizable() && "Expected add|fadd or min/max reduction operation."); - Value *Cmp; + auto *Op = createOp(Builder, Name); switch (Kind) { case RK_Arithmetic: - return Builder.CreateBinOp((Instruction::BinaryOps)Opcode, LHS, RHS, - Name); + propagateIRFlags(Op, ReductionOps[0]); + return Op; case RK_Min: - Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSLT(LHS, RHS) - : Builder.CreateFCmpOLT(LHS, RHS); - break; case RK_Max: - Cmp = Opcode == Instruction::ICmp ? Builder.CreateICmpSGT(LHS, RHS) - : Builder.CreateFCmpOGT(LHS, RHS); - break; case RK_UMin: - assert(Opcode == Instruction::ICmp && "Expected integer types."); - Cmp = Builder.CreateICmpULT(LHS, RHS); + case RK_UMax: + propagateIRFlags(cast(Op)->getCondition(), ReductionOps[0]); + propagateIRFlags(Op, ReductionOps[1]); + return Op; + break; + case RK_None: break; + } + llvm_unreachable("Unknown reduction operation."); + } + + /// Creates reduction operation with the current opcode with the IR flags + /// from \p I. + Value *createOp(IRBuilder<> &Builder, const Twine &Name, + Instruction *I) const { + assert(isVectorizable() && + "Expected add|fadd or min/max reduction operation."); + auto *Op = createOp(Builder, Name); + switch (Kind) { + case RK_Arithmetic: + propagateIRFlags(Op, I); + return Op; + case RK_Min: + case RK_Max: + case RK_UMin: case RK_UMax: - assert(Opcode == Instruction::ICmp && "Expected integer types."); - Cmp = Builder.CreateICmpUGT(LHS, RHS); + propagateIRFlags(cast(Op)->getCondition(), + cast(I)->getCondition()); + propagateIRFlags(Op, I); + return Op; break; case RK_None: - llvm_unreachable("Unknown reduction operation."); + break; } - return Builder.CreateSelect(Cmp, LHS, RHS, Name); + llvm_unreachable("Unknown reduction operation."); } + TargetTransformInfo::ReductionFlags getFlags() const { TargetTransformInfo::ReductionFlags Flags; Flags.NoNaN = NoNaN; @@ -5000,6 +5100,7 @@ SmallVector, 32> Stack; Stack.push_back(std::make_pair(B, ReductionData.getFirstOperandIndex())); const unsigned NUses = ReductionData.getRequiredNumberOfUses(); + ReductionData.initReductionOps(ReductionOps); while (!Stack.empty()) { Instruction *TreeN = Stack.back().first; unsigned EdgeToVist = Stack.back().second++; @@ -5025,7 +5126,7 @@ markExtraArg(Stack[Stack.size() - 2], TreeN); ExtraArgs.erase(TreeN); } else - ReductionOps.push_back(TreeN); + ReductionData.addReductionOps(TreeN, ReductionOps); } // Retract. Stack.pop_back(); @@ -5110,14 +5211,18 @@ BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues; // The same extra argument may be used several time, so log each attempt // to use it. + SmallVector IgnoreList; + for (auto &V : ReductionOps) + IgnoreList.append(V.begin(), V.end()); for (auto &Pair : ExtraArgs) ExternallyUsedValues[Pair.second].push_back(Pair.first); while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) { auto VL = makeArrayRef(&ReducedVals[i], ReduxWidth); - V.buildTree(VL, ExternallyUsedValues, ReductionOps); + + V.buildTree(VL, ExternallyUsedValues, IgnoreList); if (V.shouldReorder()) { SmallVector Reversed(VL.rbegin(), VL.rend()); - V.buildTree(Reversed, ExternallyUsedValues, ReductionOps); + V.buildTree(Reversed, ExternallyUsedValues, IgnoreList); } if (V.isTreeTinyAndNotFullyVectorizable()) break; @@ -5145,14 +5250,14 @@ // Emit a reduction. Value *ReducedSubTree = - emitReduction(VectorizedRoot, Builder, ReduxWidth, ReductionOps, TTI); + emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI); if (VectorizedTree) { Builder.SetCurrentDebugLocation(Loc); OperationData VectReductionData(ReductionData.getOpcode(), VectorizedTree, ReducedSubTree, ReductionData.getKind()); - VectorizedTree = VectReductionData.createOp(Builder, "op.rdx"); - propagateIRFlags(VectorizedTree, ReductionOps); + VectorizedTree = + VectReductionData.createOp(Builder, "op.rdx", ReductionOps); } else VectorizedTree = ReducedSubTree; i += ReduxWidth; @@ -5167,8 +5272,7 @@ OperationData VectReductionData(ReductionData.getOpcode(), VectorizedTree, I, ReductionData.getKind()); - VectorizedTree = VectReductionData.createOp(Builder); - propagateIRFlags(VectorizedTree, ReductionOps); + VectorizedTree = VectReductionData.createOp(Builder, "", ReductionOps); } for (auto &Pair : ExternallyUsedValues) { assert(!Pair.second.empty() && @@ -5179,12 +5283,14 @@ OperationData VectReductionData(ReductionData.getOpcode(), VectorizedTree, Pair.first, ReductionData.getKind()); - VectorizedTree = VectReductionData.createOp(Builder, "op.extra"); - propagateIRFlags(VectorizedTree, I); + VectorizedTree = VectReductionData.createOp(Builder, "op.extra", I); } } // Update users. ReductionRoot->replaceAllUsesWith(VectorizedTree); + // Mark all scalar reduction ops for deletion, they are replaced by the + // vector reductions. + V.eraseInstructions(IgnoreList); } return VectorizedTree != nullptr; } @@ -5264,8 +5370,7 @@ /// \brief Emit a horizontal reduction of the vectorized value. Value *emitReduction(Value *VectorizedValue, IRBuilder<> &Builder, - unsigned ReduxWidth, ArrayRef RedOps, - const TargetTransformInfo *TTI) { + unsigned ReduxWidth, const TargetTransformInfo *TTI) { assert(VectorizedValue && "Need to have a vectorized tree node"); assert(isPowerOf2_32(ReduxWidth) && "We only handle power-of-two reductions for now"); @@ -5273,7 +5378,7 @@ if (!IsPairwiseReduction) return createSimpleTargetReduction( Builder, TTI, ReductionData.getOpcode(), VectorizedValue, - ReductionData.getFlags(), RedOps); + ReductionData.getFlags(), ReductionOps.back()); Value *TmpVec = VectorizedValue; for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) { @@ -5289,8 +5394,7 @@ "rdx.shuf.r"); OperationData VectReductionData(ReductionData.getOpcode(), LeftShuf, RightShuf, ReductionData.getKind()); - TmpVec = VectReductionData.createOp(Builder, "op.rdx"); - propagateIRFlags(TmpVec, RedOps); + TmpVec = VectReductionData.createOp(Builder, "op.rdx", ReductionOps); } // The result is in the first element of the vector. @@ -5438,18 +5542,13 @@ // horizontal reduction. // Interrupt the process if the Root instruction itself was vectorized or all // sub-trees not higher that RecursionMaxDepth were analyzed/vectorized. - SmallVector, 8> Stack(1, {Root, 0}); + SmallVector, 8> Stack(1, {Root, 0}); SmallSet VisitedInstrs; bool Res = false; while (!Stack.empty()) { - Value *V; + Instruction *Inst; unsigned Level; - std::tie(V, Level) = Stack.pop_back_val(); - if (!V) - continue; - auto *Inst = dyn_cast(V); - if (!Inst) - continue; + std::tie(Inst, Level) = Stack.pop_back_val(); auto *BI = dyn_cast(Inst); auto *SI = dyn_cast(Inst); if (BI || SI) { @@ -5490,8 +5589,8 @@ for (auto *Op : Inst->operand_values()) if (VisitedInstrs.insert(Op).second) if (auto *I = dyn_cast(Op)) - if (!isa(I) && I->getParent() == BB) - Stack.emplace_back(Op, Level); + if (!isa(I) && !R.isDeleted(I) && I->getParent() == BB) + Stack.emplace_back(I, Level); } return Res; } @@ -5556,11 +5655,10 @@ } bool SLPVectorizerPass::vectorizeSimpleInstructions( - SmallVectorImpl &Instructions, BasicBlock *BB, BoUpSLP &R) { + SmallVectorImpl &Instructions, BasicBlock *BB, BoUpSLP &R) { bool OpsChanged = false; - for (auto &VH : reverse(Instructions)) { - auto *I = dyn_cast_or_null(VH); - if (!I) + for (auto *I : reverse(Instructions)) { + if (R.isDeleted(I)) continue; if (auto *LastInsertValue = dyn_cast(I)) OpsChanged |= vectorizeInsertValueInst(LastInsertValue, BB, R); @@ -5589,7 +5687,7 @@ if (!P) break; - if (!VisitedInstrs.count(P)) + if (!VisitedInstrs.count(P) && !R.isDeleted(P)) Incoming.push_back(P); } @@ -5632,9 +5730,12 @@ VisitedInstrs.clear(); - SmallVector PostProcessInstructions; + SmallVector PostProcessInstructions; SmallDenseSet KeyNodes; for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) { + // Skip instructions marked for the deletion. + if (R.isDeleted(&*it)) + continue; // We may go through BB multiple times so skip the one we have checked. if (!VisitedInstrs.insert(&*it).second) { if (it->use_empty() && KeyNodes.count(&*it) > 0 && @@ -5728,10 +5829,10 @@ SetVector Candidates(GEPList.begin(), GEPList.end()); // Some of the candidates may have already been vectorized after we - // initially collected them. If so, the WeakTrackingVHs will have - // nullified the - // values, so remove them from the set of candidates. - Candidates.remove(nullptr); + // initially collected them. If so, they are marked as deleted, so remove + // them from the set of candidates. + Candidates.remove_if( + [&R](Value *I) { return R.isDeleted(cast(I)); }); // Remove from the set of candidates all pairs of getelementptrs with // constant differences. Such getelementptrs are likely not good @@ -5739,18 +5840,18 @@ // computed from the other. We also ensure all candidate getelementptr // indices are unique. for (int I = 0, E = GEPList.size(); I < E && Candidates.size() > 1; ++I) { - auto *GEPI = cast(GEPList[I]); + auto *GEPI = GEPList[I]; if (!Candidates.count(GEPI)) continue; auto *SCEVI = SE->getSCEV(GEPList[I]); for (int J = I + 1; J < E && Candidates.size() > 1; ++J) { - auto *GEPJ = cast(GEPList[J]); + auto *GEPJ = GEPList[J]; auto *SCEVJ = SE->getSCEV(GEPList[J]); if (isa(SE->getMinusSCEV(SCEVI, SCEVJ))) { - Candidates.remove(GEPList[I]); - Candidates.remove(GEPList[J]); + Candidates.remove(GEPI); + Candidates.remove(GEPJ); } else if (GEPI->idx_begin()->get() == GEPJ->idx_begin()->get()) { - Candidates.remove(GEPList[J]); + Candidates.remove(GEPJ); } } } Index: test/Transforms/SLPVectorizer/AArch64/gather-root.ll =================================================================== --- test/Transforms/SLPVectorizer/AArch64/gather-root.ll +++ test/Transforms/SLPVectorizer/AArch64/gather-root.ll @@ -15,18 +15,10 @@ ; DEFAULT-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer ; DEFAULT-NEXT: br label [[FOR_BODY:%.*]] ; DEFAULT: for.body: -; DEFAULT-NEXT: [[TMP17:%.*]] = phi i32 [ [[BIN_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; DEFAULT-NEXT: [[TMP17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; DEFAULT-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> , <8 x i32> -; DEFAULT-NEXT: [[TMP20:%.*]] = add i32 [[TMP17]], undef -; DEFAULT-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], undef -; DEFAULT-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], undef -; DEFAULT-NEXT: [[TMP26:%.*]] = add i32 [[TMP24]], undef -; DEFAULT-NEXT: [[TMP28:%.*]] = add i32 [[TMP26]], undef -; DEFAULT-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], undef -; DEFAULT-NEXT: [[TMP32:%.*]] = add i32 [[TMP30]], undef ; DEFAULT-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP2]]) -; DEFAULT-NEXT: [[BIN_EXTRA]] = add i32 [[TMP3]], [[TMP17]] -; DEFAULT-NEXT: [[TMP34:%.*]] = add i32 [[TMP32]], undef +; DEFAULT-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], [[TMP17]] ; DEFAULT-NEXT: br label [[FOR_BODY]] ; ; GATHER-LABEL: @PR28330( @@ -47,22 +39,15 @@ ; GATHER-NEXT: [[TMP15:%.*]] = icmp eq i8 [[TMP14]], 0 ; GATHER-NEXT: br label [[FOR_BODY:%.*]] ; GATHER: for.body: -; GATHER-NEXT: [[TMP17:%.*]] = phi i32 [ [[BIN_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; GATHER-NEXT: [[TMP17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; GATHER-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> , <2 x i32> ; GATHER-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0 -; GATHER-NEXT: [[TMP20:%.*]] = add i32 [[TMP17]], [[TMP3]] ; GATHER-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1 -; GATHER-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], [[TMP4]] ; GATHER-NEXT: [[TMP23:%.*]] = select i1 [[TMP5]], i32 -720, i32 -80 -; GATHER-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], [[TMP23]] ; GATHER-NEXT: [[TMP25:%.*]] = select i1 [[TMP7]], i32 -720, i32 -80 -; GATHER-NEXT: [[TMP26:%.*]] = add i32 [[TMP24]], [[TMP25]] ; GATHER-NEXT: [[TMP27:%.*]] = select i1 [[TMP9]], i32 -720, i32 -80 -; GATHER-NEXT: [[TMP28:%.*]] = add i32 [[TMP26]], [[TMP27]] ; GATHER-NEXT: [[TMP29:%.*]] = select i1 [[TMP11]], i32 -720, i32 -80 -; GATHER-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], [[TMP29]] ; GATHER-NEXT: [[TMP31:%.*]] = select i1 [[TMP13]], i32 -720, i32 -80 -; GATHER-NEXT: [[TMP32:%.*]] = add i32 [[TMP30]], [[TMP31]] ; GATHER-NEXT: [[TMP33:%.*]] = select i1 [[TMP15]], i32 -720, i32 -80 ; GATHER-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> undef, i32 [[TMP3]], i32 0 ; GATHER-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP4]], i32 1 @@ -73,8 +58,7 @@ ; GATHER-NEXT: [[TMP11:%.*]] = insertelement <8 x i32> [[TMP10]], i32 [[TMP31]], i32 6 ; GATHER-NEXT: [[TMP12:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[TMP33]], i32 7 ; GATHER-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP12]]) -; GATHER-NEXT: [[BIN_EXTRA]] = add i32 [[TMP13]], [[TMP17]] -; GATHER-NEXT: [[TMP34:%.*]] = add i32 [[TMP32]], [[TMP33]] +; GATHER-NEXT: [[OP_EXTRA]] = add i32 [[TMP13]], [[TMP17]] ; GATHER-NEXT: br label [[FOR_BODY]] ; ; MAX-COST-LABEL: @PR28330( @@ -163,18 +147,10 @@ ; DEFAULT-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer ; DEFAULT-NEXT: br label [[FOR_BODY:%.*]] ; DEFAULT: for.body: -; DEFAULT-NEXT: [[TMP17:%.*]] = phi i32 [ [[BIN_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; DEFAULT-NEXT: [[TMP17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; DEFAULT-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> , <8 x i32> -; DEFAULT-NEXT: [[TMP20:%.*]] = add i32 -5, undef -; DEFAULT-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], undef -; DEFAULT-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], undef -; DEFAULT-NEXT: [[TMP26:%.*]] = add i32 [[TMP24]], undef -; DEFAULT-NEXT: [[TMP28:%.*]] = add i32 [[TMP26]], undef -; DEFAULT-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], undef -; DEFAULT-NEXT: [[TMP32:%.*]] = add i32 [[TMP30]], undef ; DEFAULT-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP2]]) -; DEFAULT-NEXT: [[BIN_EXTRA]] = add i32 [[TMP3]], -5 -; DEFAULT-NEXT: [[TMP34:%.*]] = add i32 [[TMP32]], undef +; DEFAULT-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], -5 ; DEFAULT-NEXT: br label [[FOR_BODY]] ; ; GATHER-LABEL: @PR32038( @@ -195,22 +171,15 @@ ; GATHER-NEXT: [[TMP15:%.*]] = icmp eq i8 [[TMP14]], 0 ; GATHER-NEXT: br label [[FOR_BODY:%.*]] ; GATHER: for.body: -; GATHER-NEXT: [[TMP17:%.*]] = phi i32 [ [[BIN_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; GATHER-NEXT: [[TMP17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; GATHER-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> , <2 x i32> ; GATHER-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0 -; GATHER-NEXT: [[TMP20:%.*]] = add i32 -5, [[TMP3]] ; GATHER-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1 -; GATHER-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], [[TMP4]] ; GATHER-NEXT: [[TMP23:%.*]] = select i1 [[TMP5]], i32 -720, i32 -80 -; GATHER-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], [[TMP23]] ; GATHER-NEXT: [[TMP25:%.*]] = select i1 [[TMP7]], i32 -720, i32 -80 -; GATHER-NEXT: [[TMP26:%.*]] = add i32 [[TMP24]], [[TMP25]] ; GATHER-NEXT: [[TMP27:%.*]] = select i1 [[TMP9]], i32 -720, i32 -80 -; GATHER-NEXT: [[TMP28:%.*]] = add i32 [[TMP26]], [[TMP27]] ; GATHER-NEXT: [[TMP29:%.*]] = select i1 [[TMP11]], i32 -720, i32 -80 -; GATHER-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], [[TMP29]] ; GATHER-NEXT: [[TMP31:%.*]] = select i1 [[TMP13]], i32 -720, i32 -80 -; GATHER-NEXT: [[TMP32:%.*]] = add i32 [[TMP30]], [[TMP31]] ; GATHER-NEXT: [[TMP33:%.*]] = select i1 [[TMP15]], i32 -720, i32 -80 ; GATHER-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> undef, i32 [[TMP3]], i32 0 ; GATHER-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP4]], i32 1 @@ -221,8 +190,7 @@ ; GATHER-NEXT: [[TMP11:%.*]] = insertelement <8 x i32> [[TMP10]], i32 [[TMP31]], i32 6 ; GATHER-NEXT: [[TMP12:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[TMP33]], i32 7 ; GATHER-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP12]]) -; GATHER-NEXT: [[BIN_EXTRA]] = add i32 [[TMP13]], -5 -; GATHER-NEXT: [[TMP34:%.*]] = add i32 [[TMP32]], [[TMP33]] +; GATHER-NEXT: [[OP_EXTRA]] = add i32 [[TMP13]], -5 ; GATHER-NEXT: br label [[FOR_BODY]] ; ; MAX-COST-LABEL: @PR32038( @@ -230,9 +198,9 @@ ; MAX-COST-NEXT: [[TMP0:%.*]] = load <2 x i8>, <2 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <2 x i8>*), align 1 ; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[TMP0]], zeroinitializer ; MAX-COST-NEXT: [[TMP4:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1 -; MAX-COST-NEXT: [[TMPP5:%.*]] = icmp eq i8 [[TMP4]], 0 +; MAX-COST-NEXT: [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 0 ; MAX-COST-NEXT: [[TMP6:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4 -; MAX-COST-NEXT: [[TMPP7:%.*]] = icmp eq i8 [[TMP6]], 0 +; MAX-COST-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP6]], 0 ; MAX-COST-NEXT: [[TMP8:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1 ; MAX-COST-NEXT: [[TMP9:%.*]] = icmp eq i8 [[TMP8]], 0 ; MAX-COST-NEXT: [[TMP10:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2 @@ -248,23 +216,17 @@ ; MAX-COST-NEXT: [[TMP3:%.*]] = insertelement <4 x i1> undef, i1 [[TMP2]], i32 0 ; MAX-COST-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 ; MAX-COST-NEXT: [[TMP5:%.*]] = insertelement <4 x i1> [[TMP3]], i1 [[TMP4]], i32 1 -; MAX-COST-NEXT: [[TMP6:%.*]] = insertelement <4 x i1> [[TMP5]], i1 [[TMPP5]], i32 2 -; MAX-COST-NEXT: [[TMP7:%.*]] = insertelement <4 x i1> [[TMP6]], i1 [[TMPP7]], i32 3 +; MAX-COST-NEXT: [[TMP6:%.*]] = insertelement <4 x i1> [[TMP5]], i1 [[TMP5]], i32 2 +; MAX-COST-NEXT: [[TMP7:%.*]] = insertelement <4 x i1> [[TMP6]], i1 [[TMP7]], i32 3 ; MAX-COST-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> , <4 x i32> -; MAX-COST-NEXT: [[TMP20:%.*]] = add i32 -5, undef -; MAX-COST-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], undef -; MAX-COST-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], undef -; MAX-COST-NEXT: [[TMP26:%.*]] = add i32 [[TMP24]], undef ; MAX-COST-NEXT: [[TMP27:%.*]] = select i1 [[TMP9]], i32 -720, i32 -80 -; MAX-COST-NEXT: [[TMP28:%.*]] = add i32 [[TMP26]], [[TMP27]] ; MAX-COST-NEXT: [[TMP29:%.*]] = select i1 [[TMP11]], i32 -720, i32 -80 ; MAX-COST-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> [[TMP8]]) ; MAX-COST-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], [[TMP27]] ; MAX-COST-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], [[TMP29]] -; MAX-COST-NEXT: [[BIN_EXTRA:%.*]] = add i32 [[TMP11]], -5 -; MAX-COST-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], [[TMP29]] +; MAX-COST-NEXT: [[OP_EXTRA:%.*]] = add i32 [[TMP11]], -5 ; MAX-COST-NEXT: [[TMP31:%.*]] = select i1 [[TMP13]], i32 -720, i32 -80 -; MAX-COST-NEXT: [[TMP32:%.*]] = add i32 [[BIN_EXTRA]], [[TMP31]] +; MAX-COST-NEXT: [[TMP32:%.*]] = add i32 [[OP_EXTRA]], [[TMP31]] ; MAX-COST-NEXT: [[TMP33:%.*]] = select i1 [[TMP15]], i32 -720, i32 -80 ; MAX-COST-NEXT: [[TMP34]] = add i32 [[TMP32]], [[TMP33]] ; MAX-COST-NEXT: br label [[FOR_BODY]] Index: test/Transforms/SLPVectorizer/X86/PR31847.ll =================================================================== --- /dev/null +++ test/Transforms/SLPVectorizer/X86/PR31847.ll @@ -0,0 +1,151 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -slp-vectorizer -S -o - -mtriple=i386 -mcpu=haswell < %s | FileCheck %s +target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" + +@shift = common local_unnamed_addr global [10 x i32] zeroinitializer, align 4 +@data = common local_unnamed_addr global [10 x i8*] zeroinitializer, align 4 + +define void @flat(i32 %intensity) { +; CHECK-LABEL: @flat( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 0), align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 1), align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 0), align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 1), align 4 +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 1, [[TMP0]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 [[SHR]] +; CHECK-NEXT: [[SHR1:%.*]] = lshr i32 1, [[TMP1]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[TMP3]], i32 [[SHR1]] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[D1_DATA_046:%.*]] = phi i8* [ [[TMP3]], [[ENTRY:%.*]] ], [ [[ADD_PTR23_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[Y_045:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = load i8, i8* [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i8> undef, i8 [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i8> [[TMP6]], i8 [[TMP4]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = zext <2 x i8> [[TMP7]] to <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = add nsw <2 x i32> , [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp sgt <2 x i32> [[TMP9]], +; CHECK-NEXT: [[TMP11:%.*]] = sub nsw <2 x i32> , [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <2 x i1> [[TMP10]], <2 x i32> [[TMP9]], <2 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i32> [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[TMP12]], i32 1 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[IDX_NEG:%.*]] = sub nsw i32 0, [[ADD]] +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[D1_DATA_046]], i32 [[IDX_NEG]] +; CHECK-NEXT: [[TMP15:%.*]] = load i8, i8* [[ADD_PTR]], align 1 +; CHECK-NEXT: [[CONV15:%.*]] = zext i8 [[TMP15]] to i32 +; CHECK-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], [[INTENSITY:%.*]] +; CHECK-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 +; CHECK-NEXT: store i8 [[CONV17]], i8* [[ADD_PTR]], align 1 +; CHECK-NEXT: [[ADD_PTR18:%.*]] = getelementptr inbounds i8, i8* [[D1_DATA_046]], i32 [[ADD]] +; CHECK-NEXT: [[TMP16:%.*]] = load i8, i8* [[ADD_PTR18]], align 1 +; CHECK-NEXT: [[NOT_TOBOOL:%.*]] = icmp eq i8 [[TMP16]], 0 +; CHECK-NEXT: [[CONV21:%.*]] = zext i1 [[NOT_TOBOOL]] to i8 +; CHECK-NEXT: store i8 [[CONV21]], i8* [[ADD_PTR18]], align 1 +; CHECK-NEXT: [[ADD_PTR23:%.*]] = getelementptr inbounds i8, i8* [[D1_DATA_046]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP17:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = load i8, i8* [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i8> undef, i8 [[TMP18]], i32 0 +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x i8> [[TMP19]], i8 [[TMP17]], i32 1 +; CHECK-NEXT: [[TMP21:%.*]] = zext <2 x i8> [[TMP20]] to <2 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = add nsw <2 x i32> , [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = icmp sgt <2 x i32> [[TMP22]], +; CHECK-NEXT: [[TMP24:%.*]] = sub nsw <2 x i32> , [[TMP21]] +; CHECK-NEXT: [[TMP25:%.*]] = select <2 x i1> [[TMP23]], <2 x i32> [[TMP22]], <2 x i32> [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x i32> [[TMP25]], i32 0 +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x i32> [[TMP25]], i32 1 +; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[IDX_NEG_1:%.*]] = sub nsw i32 0, [[ADD_1]] +; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR23]], i32 [[IDX_NEG_1]] +; CHECK-NEXT: [[TMP28:%.*]] = load i8, i8* [[ADD_PTR_1]], align 1 +; CHECK-NEXT: [[CONV15_1:%.*]] = zext i8 [[TMP28]] to i32 +; CHECK-NEXT: [[ADD16_1:%.*]] = add nsw i32 [[CONV15_1]], [[INTENSITY]] +; CHECK-NEXT: [[CONV17_1:%.*]] = trunc i32 [[ADD16_1]] to i8 +; CHECK-NEXT: store i8 [[CONV17_1]], i8* [[ADD_PTR_1]], align 1 +; CHECK-NEXT: [[ADD_PTR18_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR23]], i32 [[ADD_1]] +; CHECK-NEXT: [[TMP29:%.*]] = load i8, i8* [[ADD_PTR18_1]], align 1 +; CHECK-NEXT: [[NOT_TOBOOL_1:%.*]] = icmp eq i8 [[TMP29]], 0 +; CHECK-NEXT: [[CONV21_1:%.*]] = zext i1 [[NOT_TOBOOL_1]] to i8 +; CHECK-NEXT: store i8 [[CONV21_1]], i8* [[ADD_PTR18_1]], align 1 +; CHECK-NEXT: [[ADD_PTR23_1]] = getelementptr inbounds i8, i8* [[ADD_PTR23]], i32 [[TMP1]] +; CHECK-NEXT: [[INC_1]] = add nsw i32 [[Y_045]], 2 +; CHECK-NEXT: [[EXITCOND_1:%.*]] = icmp eq i32 [[INC_1]], 128 +; CHECK-NEXT: br i1 [[EXITCOND_1]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] +; +entry: + %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 0), align 4 + %1 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 1), align 4 + %2 = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 0), align 4 + %3 = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 1), align 4 + %shr = lshr i32 1, %0 + %arrayidx = getelementptr inbounds i8, i8* %2, i32 %shr + %shr1 = lshr i32 1, %1 + %arrayidx2 = getelementptr inbounds i8, i8* %3, i32 %shr1 + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret void + +for.body: ; preds = %for.body, %entry + %d1_data.046 = phi i8* [ %3, %entry ], [ %add.ptr23.1, %for.body ] + %y.045 = phi i32 [ 0, %entry ], [ %inc.1, %for.body ] + %4 = load i8, i8* %arrayidx, align 1 + %conv = zext i8 %4 to i32 + %sub = add nsw i32 %conv, -128 + %5 = load i8, i8* %arrayidx2, align 1 + %conv3 = zext i8 %5 to i32 + %sub4 = add nsw i32 %conv3, -128 + %cmp5 = icmp sgt i32 %sub, -1 + %sub7 = sub nsw i32 128, %conv + %cond = select i1 %cmp5, i32 %sub, i32 %sub7 + %cmp8 = icmp sgt i32 %sub4, -1 + %sub12 = sub nsw i32 128, %conv3 + %cond14 = select i1 %cmp8, i32 %sub4, i32 %sub12 + %add = add nsw i32 %cond14, %cond + %idx.neg = sub nsw i32 0, %add + %add.ptr = getelementptr inbounds i8, i8* %d1_data.046, i32 %idx.neg + %6 = load i8, i8* %add.ptr, align 1 + %conv15 = zext i8 %6 to i32 + %add16 = add nsw i32 %conv15, %intensity + %conv17 = trunc i32 %add16 to i8 + store i8 %conv17, i8* %add.ptr, align 1 + %add.ptr18 = getelementptr inbounds i8, i8* %d1_data.046, i32 %add + %7 = load i8, i8* %add.ptr18, align 1 + %not.tobool = icmp eq i8 %7, 0 + %conv21 = zext i1 %not.tobool to i8 + store i8 %conv21, i8* %add.ptr18, align 1 + %add.ptr23 = getelementptr inbounds i8, i8* %d1_data.046, i32 %1 + %8 = load i8, i8* %arrayidx, align 1 + %conv.1 = zext i8 %8 to i32 + %sub.1 = add nsw i32 %conv.1, -128 + %9 = load i8, i8* %arrayidx2, align 1 + %conv3.1 = zext i8 %9 to i32 + %sub4.1 = add nsw i32 %conv3.1, -128 + %cmp5.1 = icmp sgt i32 %sub.1, -1 + %sub7.1 = sub nsw i32 128, %conv.1 + %cond.1 = select i1 %cmp5.1, i32 %sub.1, i32 %sub7.1 + %cmp8.1 = icmp sgt i32 %sub4.1, -1 + %sub12.1 = sub nsw i32 128, %conv3.1 + %cond14.1 = select i1 %cmp8.1, i32 %sub4.1, i32 %sub12.1 + %add.1 = add nsw i32 %cond14.1, %cond.1 + %idx.neg.1 = sub nsw i32 0, %add.1 + %add.ptr.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %idx.neg.1 + %10 = load i8, i8* %add.ptr.1, align 1 + %conv15.1 = zext i8 %10 to i32 + %add16.1 = add nsw i32 %conv15.1, %intensity + %conv17.1 = trunc i32 %add16.1 to i8 + store i8 %conv17.1, i8* %add.ptr.1, align 1 + %add.ptr18.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %add.1 + %11 = load i8, i8* %add.ptr18.1, align 1 + %not.tobool.1 = icmp eq i8 %11, 0 + %conv21.1 = zext i1 %not.tobool.1 to i8 + store i8 %conv21.1, i8* %add.ptr18.1, align 1 + %add.ptr23.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %1 + %inc.1 = add nsw i32 %y.045, 2 + %exitcond.1 = icmp eq i32 %inc.1, 128 + br i1 %exitcond.1, label %for.cond.cleanup, label %for.body +} Index: test/Transforms/SLPVectorizer/X86/horizontal-list.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/horizontal-list.ll +++ test/Transforms/SLPVectorizer/X86/horizontal-list.ll @@ -100,16 +100,8 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr to <8 x float>*), align 16 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr1 to <8 x float>*), align 16 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[ADD:%.*]] = fadd fast float undef, [[CONV]] -; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]] -; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] -; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] ; CHECK-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2 ; CHECK-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float -; CHECK-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV6]] -; CHECK-NEXT: [[ADD19:%.*]] = fadd fast float undef, [[ADD7]] -; CHECK-NEXT: [[ADD19_1:%.*]] = fadd fast float undef, [[ADD19]] -; CHECK-NEXT: [[ADD19_2:%.*]] = fadd fast float undef, [[ADD19_1]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP3]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> @@ -119,7 +111,6 @@ ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]] ; CHECK-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV6]] -; CHECK-NEXT: [[ADD19_3:%.*]] = fadd fast float undef, [[ADD19_2]] ; CHECK-NEXT: store float [[OP_EXTRA5]], float* @res, align 4 ; CHECK-NEXT: ret float [[OP_EXTRA5]] ; @@ -131,16 +122,8 @@ ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr to <8 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr1 to <8 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]] -; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float undef, [[CONV]] -; THRESHOLD-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]] -; THRESHOLD-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] -; THRESHOLD-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] ; THRESHOLD-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2 ; THRESHOLD-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float -; THRESHOLD-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV6]] -; THRESHOLD-NEXT: [[ADD19:%.*]] = fadd fast float undef, [[ADD7]] -; THRESHOLD-NEXT: [[ADD19_1:%.*]] = fadd fast float undef, [[ADD19]] -; THRESHOLD-NEXT: [[ADD19_2:%.*]] = fadd fast float undef, [[ADD19_1]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> undef, <8 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP3]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> @@ -150,7 +133,6 @@ ; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]] ; THRESHOLD-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV6]] -; THRESHOLD-NEXT: [[ADD19_3:%.*]] = fadd fast float undef, [[ADD19_2]] ; THRESHOLD-NEXT: store float [[OP_EXTRA5]], float* @res, align 4 ; THRESHOLD-NEXT: ret float [[OP_EXTRA5]] ; @@ -205,17 +187,14 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[TMP4:%.*]] = fadd fast float undef, undef -; CHECK-NEXT: [[TMP5:%.*]] = fadd fast float undef, [[TMP4]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = fadd fast float undef, [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]] -; CHECK-NEXT: store float [[TMP8]], float* @res, align 4 -; CHECK-NEXT: ret float [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]] +; CHECK-NEXT: store float [[TMP5]], float* @res, align 4 +; CHECK-NEXT: ret float [[TMP5]] ; ; THRESHOLD-LABEL: @bazzz( ; THRESHOLD-NEXT: entry: @@ -224,17 +203,14 @@ ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] -; THRESHOLD-NEXT: [[TMP4:%.*]] = fadd fast float undef, undef -; THRESHOLD-NEXT: [[TMP5:%.*]] = fadd fast float undef, [[TMP4]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] -; THRESHOLD-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 -; THRESHOLD-NEXT: [[TMP7:%.*]] = fadd fast float undef, [[TMP5]] -; THRESHOLD-NEXT: [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]] -; THRESHOLD-NEXT: store float [[TMP8]], float* @res, align 4 -; THRESHOLD-NEXT: ret float [[TMP8]] +; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 +; THRESHOLD-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]] +; THRESHOLD-NEXT: store float [[TMP5]], float* @res, align 4 +; THRESHOLD-NEXT: ret float [[TMP5]] ; entry: %0 = load i32, i32* @n, align 4 @@ -267,16 +243,13 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[TMP4:%.*]] = fadd fast float undef, undef -; CHECK-NEXT: [[TMP5:%.*]] = fadd fast float undef, [[TMP4]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = fadd fast float undef, [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]] -; CHECK-NEXT: [[CONV4:%.*]] = fptosi float [[TMP8]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]] +; CHECK-NEXT: [[CONV4:%.*]] = fptosi float [[TMP5]] to i32 ; CHECK-NEXT: store i32 [[CONV4]], i32* @n, align 4 ; CHECK-NEXT: ret i32 [[CONV4]] ; @@ -287,16 +260,13 @@ ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] -; THRESHOLD-NEXT: [[TMP4:%.*]] = fadd fast float undef, undef -; THRESHOLD-NEXT: [[TMP5:%.*]] = fadd fast float undef, [[TMP4]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] -; THRESHOLD-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 -; THRESHOLD-NEXT: [[TMP7:%.*]] = fadd fast float undef, [[TMP5]] -; THRESHOLD-NEXT: [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]] -; THRESHOLD-NEXT: [[CONV4:%.*]] = fptosi float [[TMP8]] to i32 +; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 +; THRESHOLD-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]] +; THRESHOLD-NEXT: [[CONV4:%.*]] = fptosi float [[TMP5]] to i32 ; THRESHOLD-NEXT: store i32 [[CONV4]], i32* @n, align 4 ; THRESHOLD-NEXT: ret i32 [[CONV4]] ; @@ -330,50 +300,30 @@ ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]] -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1 -; CHECK-NEXT: [[CMP4:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float undef, float undef -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2 -; CHECK-NEXT: [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], [[TMP5]] -; CHECK-NEXT: [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float [[MAX_0_MUL3]], float undef -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3 -; CHECK-NEXT: [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], [[TMP6]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <4 x float> [[TMP2]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP2]], <4 x float> [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <4 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> [[RDX_SHUF1]] -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[RDX_MINMAX_SELECT3]], i32 0 -; CHECK-NEXT: [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float [[MAX_0_MUL3_1]], float undef -; CHECK-NEXT: store float [[TMP7]], float* @res, align 4 -; CHECK-NEXT: ret float [[TMP7]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[RDX_MINMAX_SELECT3]], i32 0 +; CHECK-NEXT: store float [[TMP3]], float* @res, align 4 +; CHECK-NEXT: ret float [[TMP3]] ; ; THRESHOLD-LABEL: @bar( ; THRESHOLD-NEXT: entry: ; THRESHOLD-NEXT: [[TMP0:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[TMP1]], [[TMP0]] -; THRESHOLD-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 -; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1 -; THRESHOLD-NEXT: [[CMP4:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] -; THRESHOLD-NEXT: [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float undef, float undef -; THRESHOLD-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2 -; THRESHOLD-NEXT: [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], [[TMP5]] -; THRESHOLD-NEXT: [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float [[MAX_0_MUL3]], float undef -; THRESHOLD-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3 -; THRESHOLD-NEXT: [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], [[TMP6]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <4 x i32> ; THRESHOLD-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <4 x float> [[TMP2]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP2]], <4 x float> [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> undef, <4 x i32> ; THRESHOLD-NEXT: [[RDX_MINMAX_CMP2:%.*]] = fcmp fast ogt <4 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; THRESHOLD-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> [[RDX_SHUF1]] -; THRESHOLD-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[RDX_MINMAX_SELECT3]], i32 0 -; THRESHOLD-NEXT: [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float [[MAX_0_MUL3_1]], float undef -; THRESHOLD-NEXT: store float [[TMP7]], float* @res, align 4 -; THRESHOLD-NEXT: ret float [[TMP7]] +; THRESHOLD-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[RDX_MINMAX_SELECT3]], i32 0 +; THRESHOLD-NEXT: store float [[TMP3]], float* @res, align 4 +; THRESHOLD-NEXT: ret float [[TMP3]] ; entry: %0 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16 @@ -418,21 +368,6 @@ ; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <16 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float undef, undef -; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] -; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] -; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] -; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] -; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] -; CHECK-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]] -; CHECK-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]] -; CHECK-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]] -; CHECK-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]] -; CHECK-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]] -; CHECK-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]] -; CHECK-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]] -; CHECK-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] -; CHECK-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]] ; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 ; CHECK-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 ; CHECK-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 @@ -467,37 +402,6 @@ ; CHECK-NEXT: [[ARRAYIDX_47:%.*]] = getelementptr inbounds float, float* [[X]], i64 47 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x float>* ; CHECK-NEXT: [[TMP3:%.*]] = load <32 x float>, <32 x float>* [[TMP2]], align 4 -; CHECK-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]] -; CHECK-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]] -; CHECK-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]] -; CHECK-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]] -; CHECK-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]] -; CHECK-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]] -; CHECK-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]] -; CHECK-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]] -; CHECK-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]] -; CHECK-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]] -; CHECK-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]] -; CHECK-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]] -; CHECK-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]] -; CHECK-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]] -; CHECK-NEXT: [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]] -; CHECK-NEXT: [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]] -; CHECK-NEXT: [[ADD_32:%.*]] = fadd fast float undef, [[ADD_31]] -; CHECK-NEXT: [[ADD_33:%.*]] = fadd fast float undef, [[ADD_32]] -; CHECK-NEXT: [[ADD_34:%.*]] = fadd fast float undef, [[ADD_33]] -; CHECK-NEXT: [[ADD_35:%.*]] = fadd fast float undef, [[ADD_34]] -; CHECK-NEXT: [[ADD_36:%.*]] = fadd fast float undef, [[ADD_35]] -; CHECK-NEXT: [[ADD_37:%.*]] = fadd fast float undef, [[ADD_36]] -; CHECK-NEXT: [[ADD_38:%.*]] = fadd fast float undef, [[ADD_37]] -; CHECK-NEXT: [[ADD_39:%.*]] = fadd fast float undef, [[ADD_38]] -; CHECK-NEXT: [[ADD_40:%.*]] = fadd fast float undef, [[ADD_39]] -; CHECK-NEXT: [[ADD_41:%.*]] = fadd fast float undef, [[ADD_40]] -; CHECK-NEXT: [[ADD_42:%.*]] = fadd fast float undef, [[ADD_41]] -; CHECK-NEXT: [[ADD_43:%.*]] = fadd fast float undef, [[ADD_42]] -; CHECK-NEXT: [[ADD_44:%.*]] = fadd fast float undef, [[ADD_43]] -; CHECK-NEXT: [[ADD_45:%.*]] = fadd fast float undef, [[ADD_44]] -; CHECK-NEXT: [[ADD_46:%.*]] = fadd fast float undef, [[ADD_45]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP3]], <32 x float> undef, <32 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP3]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> @@ -519,7 +423,6 @@ ; CHECK-NEXT: [[BIN_RDX16:%.*]] = fadd fast <16 x float> [[BIN_RDX14]], [[RDX_SHUF15]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <16 x float> [[BIN_RDX16]], i32 0 ; CHECK-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]] -; CHECK-NEXT: [[ADD_47:%.*]] = fadd fast float undef, [[ADD_46]] ; CHECK-NEXT: ret float [[OP_RDX]] ; ; THRESHOLD-LABEL: @f( @@ -541,21 +444,6 @@ ; THRESHOLD-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <16 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4 -; THRESHOLD-NEXT: [[ADD_1:%.*]] = fadd fast float undef, undef -; THRESHOLD-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] -; THRESHOLD-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] -; THRESHOLD-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] -; THRESHOLD-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] -; THRESHOLD-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] -; THRESHOLD-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]] -; THRESHOLD-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]] -; THRESHOLD-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]] -; THRESHOLD-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]] -; THRESHOLD-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]] -; THRESHOLD-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]] -; THRESHOLD-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]] -; THRESHOLD-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] -; THRESHOLD-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]] ; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 ; THRESHOLD-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 ; THRESHOLD-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 @@ -590,37 +478,6 @@ ; THRESHOLD-NEXT: [[ARRAYIDX_47:%.*]] = getelementptr inbounds float, float* [[X]], i64 47 ; THRESHOLD-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x float>* ; THRESHOLD-NEXT: [[TMP3:%.*]] = load <32 x float>, <32 x float>* [[TMP2]], align 4 -; THRESHOLD-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]] -; THRESHOLD-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]] -; THRESHOLD-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]] -; THRESHOLD-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]] -; THRESHOLD-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]] -; THRESHOLD-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]] -; THRESHOLD-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]] -; THRESHOLD-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]] -; THRESHOLD-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]] -; THRESHOLD-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]] -; THRESHOLD-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]] -; THRESHOLD-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]] -; THRESHOLD-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]] -; THRESHOLD-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]] -; THRESHOLD-NEXT: [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]] -; THRESHOLD-NEXT: [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]] -; THRESHOLD-NEXT: [[ADD_32:%.*]] = fadd fast float undef, [[ADD_31]] -; THRESHOLD-NEXT: [[ADD_33:%.*]] = fadd fast float undef, [[ADD_32]] -; THRESHOLD-NEXT: [[ADD_34:%.*]] = fadd fast float undef, [[ADD_33]] -; THRESHOLD-NEXT: [[ADD_35:%.*]] = fadd fast float undef, [[ADD_34]] -; THRESHOLD-NEXT: [[ADD_36:%.*]] = fadd fast float undef, [[ADD_35]] -; THRESHOLD-NEXT: [[ADD_37:%.*]] = fadd fast float undef, [[ADD_36]] -; THRESHOLD-NEXT: [[ADD_38:%.*]] = fadd fast float undef, [[ADD_37]] -; THRESHOLD-NEXT: [[ADD_39:%.*]] = fadd fast float undef, [[ADD_38]] -; THRESHOLD-NEXT: [[ADD_40:%.*]] = fadd fast float undef, [[ADD_39]] -; THRESHOLD-NEXT: [[ADD_41:%.*]] = fadd fast float undef, [[ADD_40]] -; THRESHOLD-NEXT: [[ADD_42:%.*]] = fadd fast float undef, [[ADD_41]] -; THRESHOLD-NEXT: [[ADD_43:%.*]] = fadd fast float undef, [[ADD_42]] -; THRESHOLD-NEXT: [[ADD_44:%.*]] = fadd fast float undef, [[ADD_43]] -; THRESHOLD-NEXT: [[ADD_45:%.*]] = fadd fast float undef, [[ADD_44]] -; THRESHOLD-NEXT: [[ADD_46:%.*]] = fadd fast float undef, [[ADD_45]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP3]], <32 x float> undef, <32 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP3]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> @@ -642,7 +499,6 @@ ; THRESHOLD-NEXT: [[BIN_RDX16:%.*]] = fadd fast <16 x float> [[BIN_RDX14]], [[RDX_SHUF15]] ; THRESHOLD-NEXT: [[TMP5:%.*]] = extractelement <16 x float> [[BIN_RDX16]], i32 0 ; THRESHOLD-NEXT: [[OP_RDX:%.*]] = fadd fast float [[TMP4]], [[TMP5]] -; THRESHOLD-NEXT: [[ADD_47:%.*]] = fadd fast float undef, [[ADD_46]] ; THRESHOLD-NEXT: ret float [[OP_RDX]] ; entry: @@ -829,37 +685,6 @@ ; CHECK-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = fadd fast float undef, [[CONV]] -; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]] -; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] -; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] -; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] -; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] -; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] -; CHECK-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]] -; CHECK-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]] -; CHECK-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]] -; CHECK-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]] -; CHECK-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]] -; CHECK-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]] -; CHECK-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]] -; CHECK-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] -; CHECK-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]] -; CHECK-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]] -; CHECK-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]] -; CHECK-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]] -; CHECK-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]] -; CHECK-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]] -; CHECK-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]] -; CHECK-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]] -; CHECK-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]] -; CHECK-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]] -; CHECK-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]] -; CHECK-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]] -; CHECK-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]] -; CHECK-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]] -; CHECK-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]] -; CHECK-NEXT: [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP1]], <32 x float> undef, <32 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> @@ -872,7 +697,6 @@ ; CHECK-NEXT: [[BIN_RDX8:%.*]] = fadd fast <32 x float> [[BIN_RDX6]], [[RDX_SHUF7]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <32 x float> [[BIN_RDX8]], i32 0 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]] -; CHECK-NEXT: [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]] ; CHECK-NEXT: ret float [[OP_EXTRA]] ; ; THRESHOLD-LABEL: @f1( @@ -912,37 +736,6 @@ ; THRESHOLD-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4 -; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float undef, [[CONV]] -; THRESHOLD-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]] -; THRESHOLD-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] -; THRESHOLD-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] -; THRESHOLD-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] -; THRESHOLD-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] -; THRESHOLD-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] -; THRESHOLD-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]] -; THRESHOLD-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]] -; THRESHOLD-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]] -; THRESHOLD-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]] -; THRESHOLD-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]] -; THRESHOLD-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]] -; THRESHOLD-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]] -; THRESHOLD-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] -; THRESHOLD-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]] -; THRESHOLD-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]] -; THRESHOLD-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]] -; THRESHOLD-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]] -; THRESHOLD-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]] -; THRESHOLD-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]] -; THRESHOLD-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]] -; THRESHOLD-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]] -; THRESHOLD-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]] -; THRESHOLD-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]] -; THRESHOLD-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]] -; THRESHOLD-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]] -; THRESHOLD-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]] -; THRESHOLD-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]] -; THRESHOLD-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]] -; THRESHOLD-NEXT: [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP1]], <32 x float> undef, <32 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP1]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> @@ -955,7 +748,6 @@ ; THRESHOLD-NEXT: [[BIN_RDX8:%.*]] = fadd fast <32 x float> [[BIN_RDX6]], [[RDX_SHUF7]] ; THRESHOLD-NEXT: [[TMP2:%.*]] = extractelement <32 x float> [[BIN_RDX8]], i32 0 ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]] -; THRESHOLD-NEXT: [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]] ; THRESHOLD-NEXT: ret float [[OP_EXTRA]] ; entry: @@ -1066,17 +858,12 @@ ; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 ; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align 4 -; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP1]], [[TMP0]] ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 ; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 ; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4 x float>* ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4 -; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] -; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] -; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] -; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] ; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 ; CHECK-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 @@ -1087,14 +874,6 @@ ; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8 x float>* ; CHECK-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4 -; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] -; CHECK-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]] -; CHECK-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]] -; CHECK-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]] -; CHECK-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]] -; CHECK-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]] -; CHECK-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]] -; CHECK-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]] ; CHECK-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 ; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 ; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 @@ -1113,21 +892,6 @@ ; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x float>* ; CHECK-NEXT: [[TMP7:%.*]] = load <16 x float>, <16 x float>* [[TMP6]], align 4 -; CHECK-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] -; CHECK-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]] -; CHECK-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]] -; CHECK-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]] -; CHECK-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]] -; CHECK-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]] -; CHECK-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]] -; CHECK-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]] -; CHECK-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]] -; CHECK-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]] -; CHECK-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]] -; CHECK-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]] -; CHECK-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]] -; CHECK-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]] -; CHECK-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP7]], <16 x float> undef, <16 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <16 x float> [[TMP7]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[BIN_RDX]], <16 x float> undef, <16 x i32> @@ -1153,7 +917,6 @@ ; CHECK-NEXT: [[OP_RDX17:%.*]] = fadd fast float [[OP_RDX]], [[TMP10]] ; CHECK-NEXT: [[TMP11:%.*]] = fadd fast float [[OP_RDX17]], [[TMP1]] ; CHECK-NEXT: [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]] -; CHECK-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]] ; CHECK-NEXT: ret float [[TMP12]] ; ; THRESHOLD-LABEL: @loadadd31( @@ -1162,17 +925,12 @@ ; THRESHOLD-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4 ; THRESHOLD-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align 4 -; THRESHOLD-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP1]], [[TMP0]] ; THRESHOLD-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; THRESHOLD-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 ; THRESHOLD-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 ; THRESHOLD-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 ; THRESHOLD-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4 x float>* ; THRESHOLD-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4 -; THRESHOLD-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] -; THRESHOLD-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] -; THRESHOLD-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] -; THRESHOLD-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] ; THRESHOLD-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; THRESHOLD-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 ; THRESHOLD-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 @@ -1183,14 +941,6 @@ ; THRESHOLD-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 ; THRESHOLD-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8 x float>* ; THRESHOLD-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4 -; THRESHOLD-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] -; THRESHOLD-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]] -; THRESHOLD-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]] -; THRESHOLD-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]] -; THRESHOLD-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]] -; THRESHOLD-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]] -; THRESHOLD-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]] -; THRESHOLD-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]] ; THRESHOLD-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 ; THRESHOLD-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 ; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 @@ -1209,21 +959,6 @@ ; THRESHOLD-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 ; THRESHOLD-NEXT: [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x float>* ; THRESHOLD-NEXT: [[TMP7:%.*]] = load <16 x float>, <16 x float>* [[TMP6]], align 4 -; THRESHOLD-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] -; THRESHOLD-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]] -; THRESHOLD-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]] -; THRESHOLD-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]] -; THRESHOLD-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]] -; THRESHOLD-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]] -; THRESHOLD-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]] -; THRESHOLD-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]] -; THRESHOLD-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]] -; THRESHOLD-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]] -; THRESHOLD-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]] -; THRESHOLD-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]] -; THRESHOLD-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]] -; THRESHOLD-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]] -; THRESHOLD-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP7]], <16 x float> undef, <16 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <16 x float> [[TMP7]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[BIN_RDX]], <16 x float> undef, <16 x i32> @@ -1249,7 +984,6 @@ ; THRESHOLD-NEXT: [[OP_RDX17:%.*]] = fadd fast float [[OP_RDX]], [[TMP10]] ; THRESHOLD-NEXT: [[TMP11:%.*]] = fadd fast float [[OP_RDX17]], [[TMP1]] ; THRESHOLD-NEXT: [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]] -; THRESHOLD-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]] ; THRESHOLD-NEXT: ret float [[TMP12]] ; entry: @@ -1360,14 +1094,6 @@ ; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]] -; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]] -; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4]], [[CONV]] -; CHECK-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]] -; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]] -; CHECK-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]] -; CHECK-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]] -; CHECK-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> @@ -1377,7 +1103,6 @@ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] ; CHECK-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] -; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]] ; CHECK-NEXT: ret float [[OP_EXTRA5]] ; ; THRESHOLD-LABEL: @extra_args( @@ -1394,14 +1119,6 @@ ; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 -; THRESHOLD-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]] -; THRESHOLD-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]] -; THRESHOLD-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4]], [[CONV]] -; THRESHOLD-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]] -; THRESHOLD-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]] -; THRESHOLD-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]] -; THRESHOLD-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]] -; THRESHOLD-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> @@ -1411,7 +1128,6 @@ ; THRESHOLD-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] ; THRESHOLD-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] -; THRESHOLD-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]] ; THRESHOLD-NEXT: ret float [[OP_EXTRA5]] ; entry: @@ -1460,16 +1176,6 @@ ; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]] -; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]] -; CHECK-NEXT: [[ADD41:%.*]] = fadd fast float [[ADD4]], 5.000000e+00 -; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD41]], [[CONV]] -; CHECK-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]] -; CHECK-NEXT: [[ADD4_11:%.*]] = fadd fast float [[ADD4_1]], 5.000000e+00 -; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_11]] -; CHECK-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]] -; CHECK-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]] -; CHECK-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> @@ -1481,7 +1187,6 @@ ; CHECK-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], 5.000000e+00 ; CHECK-NEXT: [[OP_EXTRA6:%.*]] = fadd fast float [[OP_EXTRA5]], 5.000000e+00 ; CHECK-NEXT: [[OP_EXTRA7:%.*]] = fadd fast float [[OP_EXTRA6]], [[CONV]] -; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]] ; CHECK-NEXT: ret float [[OP_EXTRA7]] ; ; THRESHOLD-LABEL: @extra_args_same_several_times( @@ -1498,16 +1203,6 @@ ; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 -; THRESHOLD-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]] -; THRESHOLD-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]] -; THRESHOLD-NEXT: [[ADD41:%.*]] = fadd fast float [[ADD4]], 5.000000e+00 -; THRESHOLD-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD41]], [[CONV]] -; THRESHOLD-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]] -; THRESHOLD-NEXT: [[ADD4_11:%.*]] = fadd fast float [[ADD4_1]], 5.000000e+00 -; THRESHOLD-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_11]] -; THRESHOLD-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]] -; THRESHOLD-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]] -; THRESHOLD-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> @@ -1519,7 +1214,6 @@ ; THRESHOLD-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], 5.000000e+00 ; THRESHOLD-NEXT: [[OP_EXTRA6:%.*]] = fadd fast float [[OP_EXTRA5]], 5.000000e+00 ; THRESHOLD-NEXT: [[OP_EXTRA7:%.*]] = fadd fast float [[OP_EXTRA6]], [[CONV]] -; THRESHOLD-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]] ; THRESHOLD-NEXT: ret float [[OP_EXTRA7]] ; entry: @@ -1572,14 +1266,6 @@ ; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]] -; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]] -; CHECK-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD4]] -; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]] -; CHECK-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]] -; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4_3]], [[CONV]] -; CHECK-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD5]] -; CHECK-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> @@ -1589,7 +1275,6 @@ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] ; CHECK-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] -; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]] ; CHECK-NEXT: ret float [[OP_EXTRA5]] ; ; THRESHOLD-LABEL: @extra_args_no_replace( @@ -1608,14 +1293,6 @@ ; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 -; THRESHOLD-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]] -; THRESHOLD-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]] -; THRESHOLD-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD4]] -; THRESHOLD-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]] -; THRESHOLD-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]] -; THRESHOLD-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4_3]], [[CONV]] -; THRESHOLD-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD5]] -; THRESHOLD-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> @@ -1625,7 +1302,6 @@ ; THRESHOLD-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] ; THRESHOLD-NEXT: [[OP_EXTRA5:%.*]] = fadd fast float [[OP_EXTRA]], [[CONV]] -; THRESHOLD-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]] ; THRESHOLD-NEXT: ret float [[OP_EXTRA5]] ; entry: @@ -1676,10 +1352,6 @@ ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]], zeroinitializer ; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32> -; CHECK-NEXT: [[R1:%.*]] = add nuw i32 [[ARG]], undef -; CHECK-NEXT: [[R2:%.*]] = add nsw i32 [[R1]], undef -; CHECK-NEXT: [[R3:%.*]] = add nsw i32 [[R2]], undef -; CHECK-NEXT: [[R4:%.*]] = add nsw i32 [[R3]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP11]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> @@ -1687,7 +1359,6 @@ ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 ; CHECK-NEXT: [[OP_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]] ; CHECK-NEXT: [[OP_EXTRA3:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP9]] -; CHECK-NEXT: [[R5:%.*]] = add nsw i32 [[R4]], undef ; CHECK-NEXT: ret i32 [[OP_EXTRA3]] ; ; THRESHOLD-LABEL: @wobble( @@ -1704,10 +1375,6 @@ ; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 ; THRESHOLD-NEXT: [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]], zeroinitializer ; THRESHOLD-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32> -; THRESHOLD-NEXT: [[R1:%.*]] = add nuw i32 [[ARG]], undef -; THRESHOLD-NEXT: [[R2:%.*]] = add nsw i32 [[R1]], undef -; THRESHOLD-NEXT: [[R3:%.*]] = add nsw i32 [[R2]], undef -; THRESHOLD-NEXT: [[R4:%.*]] = add nsw i32 [[R3]], undef ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> undef, <4 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP11]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> @@ -1715,7 +1382,6 @@ ; THRESHOLD-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 ; THRESHOLD-NEXT: [[OP_EXTRA:%.*]] = add nuw i32 [[TMP12]], [[ARG]] ; THRESHOLD-NEXT: [[OP_EXTRA3:%.*]] = add nsw i32 [[OP_EXTRA]], [[TMP9]] -; THRESHOLD-NEXT: [[R5:%.*]] = add nsw i32 [[R4]], undef ; THRESHOLD-NEXT: ret i32 [[OP_EXTRA3]] ; bb: Index: test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll +++ test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll @@ -36,27 +36,6 @@ ; ; AVX-LABEL: @maxi8( ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr to <8 x i32>*), align 16 -; AVX-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP2]], i32 0 -; AVX-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP2]], i32 1 -; AVX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] -; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef -; AVX-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP2]], i32 2 -; AVX-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] -; AVX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef -; AVX-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP2]], i32 3 -; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef -; AVX-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[TMP2]], i32 4 -; AVX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -; AVX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef -; AVX-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP2]], i32 5 -; AVX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef -; AVX-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP2]], i32 6 -; AVX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] -; AVX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 undef -; AVX-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP2]], i32 7 -; AVX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] ; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> undef, <8 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <8 x i32> [[TMP2]], [[RDX_SHUF]] ; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP2]], <8 x i32> [[RDX_SHUF]] @@ -66,33 +45,11 @@ ; AVX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> undef, <8 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <8 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] ; AVX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> [[RDX_SHUF4]] -; AVX-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0 -; AVX-NEXT: [[TMP25:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 undef -; AVX-NEXT: ret i32 [[TMP24]] +; AVX-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0 +; AVX-NEXT: ret i32 [[TMP3]] ; ; AVX2-LABEL: @maxi8( ; AVX2-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr to <8 x i32>*), align 16 -; AVX2-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP2]], i32 0 -; AVX2-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP2]], i32 1 -; AVX2-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] -; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef -; AVX2-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP2]], i32 2 -; AVX2-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] -; AVX2-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef -; AVX2-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP2]], i32 3 -; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef -; AVX2-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[TMP2]], i32 4 -; AVX2-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -; AVX2-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef -; AVX2-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP2]], i32 5 -; AVX2-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef -; AVX2-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP2]], i32 6 -; AVX2-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] -; AVX2-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 undef -; AVX2-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP2]], i32 7 -; AVX2-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] ; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> undef, <8 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <8 x i32> [[TMP2]], [[RDX_SHUF]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP2]], <8 x i32> [[RDX_SHUF]] @@ -102,33 +59,11 @@ ; AVX2-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> undef, <8 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <8 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> [[RDX_SHUF4]] -; AVX2-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0 -; AVX2-NEXT: [[TMP25:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 undef -; AVX2-NEXT: ret i32 [[TMP24]] +; AVX2-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0 +; AVX2-NEXT: ret i32 [[TMP3]] ; ; SKX-LABEL: @maxi8( ; SKX-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr to <8 x i32>*), align 16 -; SKX-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP2]], i32 0 -; SKX-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP2]], i32 1 -; SKX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] -; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef -; SKX-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP2]], i32 2 -; SKX-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] -; SKX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef -; SKX-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP2]], i32 3 -; SKX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef -; SKX-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[TMP2]], i32 4 -; SKX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -; SKX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef -; SKX-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP2]], i32 5 -; SKX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef -; SKX-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP2]], i32 6 -; SKX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] -; SKX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 undef -; SKX-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP2]], i32 7 -; SKX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] ; SKX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> undef, <8 x i32> ; SKX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <8 x i32> [[TMP2]], [[RDX_SHUF]] ; SKX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x i32> [[TMP2]], <8 x i32> [[RDX_SHUF]] @@ -138,9 +73,8 @@ ; SKX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> undef, <8 x i32> ; SKX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <8 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] ; SKX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x i32> [[RDX_MINMAX_SELECT3]], <8 x i32> [[RDX_SHUF4]] -; SKX-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0 -; SKX-NEXT: [[TMP25:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 undef -; SKX-NEXT: ret i32 [[TMP24]] +; SKX-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[RDX_MINMAX_SELECT6]], i32 0 +; SKX-NEXT: ret i32 [[TMP3]] ; %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 @@ -169,101 +103,24 @@ define i32 @maxi16(i32) { ; CHECK-LABEL: @maxi16( -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8 -; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4 -; CHECK-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]] -; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 -; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]] -; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 -; CHECK-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]] -; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 -; CHECK-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]] -; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 -; CHECK-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]] -; CHECK-NEXT: [[TMP24:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 8), align 16 -; CHECK-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP23]], [[TMP24]] -; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP23]], i32 [[TMP24]] -; CHECK-NEXT: [[TMP27:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 9), align 4 -; CHECK-NEXT: [[TMP28:%.*]] = icmp sgt i32 [[TMP26]], [[TMP27]] -; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i32 [[TMP26]], i32 [[TMP27]] -; CHECK-NEXT: [[TMP30:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 10), align 8 -; CHECK-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] -; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 [[TMP29]], i32 [[TMP30]] -; CHECK-NEXT: [[TMP33:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 11), align 4 -; CHECK-NEXT: [[TMP34:%.*]] = icmp sgt i32 [[TMP32]], [[TMP33]] -; CHECK-NEXT: [[TMP35:%.*]] = select i1 [[TMP34]], i32 [[TMP32]], i32 [[TMP33]] -; CHECK-NEXT: [[TMP36:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 12), align 16 -; CHECK-NEXT: [[TMP37:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] -; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 [[TMP35]], i32 [[TMP36]] -; CHECK-NEXT: [[TMP39:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 13), align 4 -; CHECK-NEXT: [[TMP40:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] -; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP40]], i32 [[TMP38]], i32 [[TMP39]] -; CHECK-NEXT: [[TMP42:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 14), align 8 -; CHECK-NEXT: [[TMP43:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] -; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[TMP41]], i32 [[TMP42]] -; CHECK-NEXT: [[TMP45:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 15), align 4 -; CHECK-NEXT: [[TMP46:%.*]] = icmp sgt i32 [[TMP44]], [[TMP45]] -; CHECK-NEXT: [[TMP47:%.*]] = select i1 [[TMP46]], i32 [[TMP44]], i32 [[TMP45]] -; CHECK-NEXT: ret i32 [[TMP47]] +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr to <16 x i32>*), align 16 +; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> undef, <16 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <16 x i32> [[TMP2]], [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x i32> [[TMP2]], <16 x i32> [[RDX_SHUF]] +; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT]], <16 x i32> undef, <16 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP2]], <16 x i32> [[RDX_MINMAX_SELECT]], <16 x i32> [[RDX_SHUF1]] +; CHECK-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT3]], <16 x i32> undef, <16 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP5]], <16 x i32> [[RDX_MINMAX_SELECT3]], <16 x i32> [[RDX_SHUF4]] +; CHECK-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> undef, <16 x i32> +; CHECK-NEXT: [[RDX_MINMAX_CMP8:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] +; CHECK-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> [[RDX_SHUF7]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[RDX_MINMAX_SELECT9]], i32 0 +; CHECK-NEXT: ret i32 [[TMP3]] ; ; AVX-LABEL: @maxi16( ; AVX-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr to <16 x i32>*), align 16 -; AVX-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[TMP2]], i32 0 -; AVX-NEXT: [[TMP4:%.*]] = extractelement <16 x i32> [[TMP2]], i32 1 -; AVX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] -; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef -; AVX-NEXT: [[TMP7:%.*]] = extractelement <16 x i32> [[TMP2]], i32 2 -; AVX-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] -; AVX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef -; AVX-NEXT: [[TMP10:%.*]] = extractelement <16 x i32> [[TMP2]], i32 3 -; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef -; AVX-NEXT: [[TMP13:%.*]] = extractelement <16 x i32> [[TMP2]], i32 4 -; AVX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -; AVX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef -; AVX-NEXT: [[TMP16:%.*]] = extractelement <16 x i32> [[TMP2]], i32 5 -; AVX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef -; AVX-NEXT: [[TMP19:%.*]] = extractelement <16 x i32> [[TMP2]], i32 6 -; AVX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] -; AVX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 undef -; AVX-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP2]], i32 7 -; AVX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] -; AVX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 undef -; AVX-NEXT: [[TMP25:%.*]] = extractelement <16 x i32> [[TMP2]], i32 8 -; AVX-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -; AVX-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[TMP24]], i32 undef -; AVX-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP2]], i32 9 -; AVX-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] -; AVX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP27]], i32 undef -; AVX-NEXT: [[TMP31:%.*]] = extractelement <16 x i32> [[TMP2]], i32 10 -; AVX-NEXT: [[TMP32:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] -; AVX-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[TMP30]], i32 undef -; AVX-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP2]], i32 11 -; AVX-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]] -; AVX-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP33]], i32 undef -; AVX-NEXT: [[TMP37:%.*]] = extractelement <16 x i32> [[TMP2]], i32 12 -; AVX-NEXT: [[TMP38:%.*]] = icmp sgt i32 [[TMP36]], [[TMP37]] -; AVX-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[TMP36]], i32 undef -; AVX-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP2]], i32 13 -; AVX-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] -; AVX-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP39]], i32 undef -; AVX-NEXT: [[TMP43:%.*]] = extractelement <16 x i32> [[TMP2]], i32 14 -; AVX-NEXT: [[TMP44:%.*]] = icmp sgt i32 [[TMP42]], [[TMP43]] -; AVX-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[TMP42]], i32 undef -; AVX-NEXT: [[TMP46:%.*]] = extractelement <16 x i32> [[TMP2]], i32 15 -; AVX-NEXT: [[TMP47:%.*]] = icmp sgt i32 [[TMP45]], [[TMP46]] ; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> undef, <16 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <16 x i32> [[TMP2]], [[RDX_SHUF]] ; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x i32> [[TMP2]], <16 x i32> [[RDX_SHUF]] @@ -276,57 +133,11 @@ ; AVX-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> undef, <16 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP8:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] ; AVX-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> [[RDX_SHUF7]] -; AVX-NEXT: [[TMP48:%.*]] = extractelement <16 x i32> [[RDX_MINMAX_SELECT9]], i32 0 -; AVX-NEXT: [[TMP49:%.*]] = select i1 [[TMP47]], i32 [[TMP45]], i32 undef -; AVX-NEXT: ret i32 [[TMP48]] +; AVX-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[RDX_MINMAX_SELECT9]], i32 0 +; AVX-NEXT: ret i32 [[TMP3]] ; ; AVX2-LABEL: @maxi16( ; AVX2-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr to <16 x i32>*), align 16 -; AVX2-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[TMP2]], i32 0 -; AVX2-NEXT: [[TMP4:%.*]] = extractelement <16 x i32> [[TMP2]], i32 1 -; AVX2-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] -; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef -; AVX2-NEXT: [[TMP7:%.*]] = extractelement <16 x i32> [[TMP2]], i32 2 -; AVX2-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] -; AVX2-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef -; AVX2-NEXT: [[TMP10:%.*]] = extractelement <16 x i32> [[TMP2]], i32 3 -; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef -; AVX2-NEXT: [[TMP13:%.*]] = extractelement <16 x i32> [[TMP2]], i32 4 -; AVX2-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -; AVX2-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef -; AVX2-NEXT: [[TMP16:%.*]] = extractelement <16 x i32> [[TMP2]], i32 5 -; AVX2-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef -; AVX2-NEXT: [[TMP19:%.*]] = extractelement <16 x i32> [[TMP2]], i32 6 -; AVX2-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] -; AVX2-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 undef -; AVX2-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP2]], i32 7 -; AVX2-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] -; AVX2-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 undef -; AVX2-NEXT: [[TMP25:%.*]] = extractelement <16 x i32> [[TMP2]], i32 8 -; AVX2-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -; AVX2-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[TMP24]], i32 undef -; AVX2-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP2]], i32 9 -; AVX2-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] -; AVX2-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP27]], i32 undef -; AVX2-NEXT: [[TMP31:%.*]] = extractelement <16 x i32> [[TMP2]], i32 10 -; AVX2-NEXT: [[TMP32:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] -; AVX2-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[TMP30]], i32 undef -; AVX2-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP2]], i32 11 -; AVX2-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]] -; AVX2-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP33]], i32 undef -; AVX2-NEXT: [[TMP37:%.*]] = extractelement <16 x i32> [[TMP2]], i32 12 -; AVX2-NEXT: [[TMP38:%.*]] = icmp sgt i32 [[TMP36]], [[TMP37]] -; AVX2-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[TMP36]], i32 undef -; AVX2-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP2]], i32 13 -; AVX2-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] -; AVX2-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP39]], i32 undef -; AVX2-NEXT: [[TMP43:%.*]] = extractelement <16 x i32> [[TMP2]], i32 14 -; AVX2-NEXT: [[TMP44:%.*]] = icmp sgt i32 [[TMP42]], [[TMP43]] -; AVX2-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[TMP42]], i32 undef -; AVX2-NEXT: [[TMP46:%.*]] = extractelement <16 x i32> [[TMP2]], i32 15 -; AVX2-NEXT: [[TMP47:%.*]] = icmp sgt i32 [[TMP45]], [[TMP46]] ; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> undef, <16 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <16 x i32> [[TMP2]], [[RDX_SHUF]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x i32> [[TMP2]], <16 x i32> [[RDX_SHUF]] @@ -339,57 +150,11 @@ ; AVX2-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> undef, <16 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP8:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> [[RDX_SHUF7]] -; AVX2-NEXT: [[TMP48:%.*]] = extractelement <16 x i32> [[RDX_MINMAX_SELECT9]], i32 0 -; AVX2-NEXT: [[TMP49:%.*]] = select i1 [[TMP47]], i32 [[TMP45]], i32 undef -; AVX2-NEXT: ret i32 [[TMP48]] +; AVX2-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[RDX_MINMAX_SELECT9]], i32 0 +; AVX2-NEXT: ret i32 [[TMP3]] ; ; SKX-LABEL: @maxi16( ; SKX-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr to <16 x i32>*), align 16 -; SKX-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[TMP2]], i32 0 -; SKX-NEXT: [[TMP4:%.*]] = extractelement <16 x i32> [[TMP2]], i32 1 -; SKX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] -; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef -; SKX-NEXT: [[TMP7:%.*]] = extractelement <16 x i32> [[TMP2]], i32 2 -; SKX-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] -; SKX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef -; SKX-NEXT: [[TMP10:%.*]] = extractelement <16 x i32> [[TMP2]], i32 3 -; SKX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef -; SKX-NEXT: [[TMP13:%.*]] = extractelement <16 x i32> [[TMP2]], i32 4 -; SKX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -; SKX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef -; SKX-NEXT: [[TMP16:%.*]] = extractelement <16 x i32> [[TMP2]], i32 5 -; SKX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef -; SKX-NEXT: [[TMP19:%.*]] = extractelement <16 x i32> [[TMP2]], i32 6 -; SKX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] -; SKX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 undef -; SKX-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP2]], i32 7 -; SKX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] -; SKX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 undef -; SKX-NEXT: [[TMP25:%.*]] = extractelement <16 x i32> [[TMP2]], i32 8 -; SKX-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -; SKX-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[TMP24]], i32 undef -; SKX-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP2]], i32 9 -; SKX-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] -; SKX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP27]], i32 undef -; SKX-NEXT: [[TMP31:%.*]] = extractelement <16 x i32> [[TMP2]], i32 10 -; SKX-NEXT: [[TMP32:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] -; SKX-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[TMP30]], i32 undef -; SKX-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP2]], i32 11 -; SKX-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]] -; SKX-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP33]], i32 undef -; SKX-NEXT: [[TMP37:%.*]] = extractelement <16 x i32> [[TMP2]], i32 12 -; SKX-NEXT: [[TMP38:%.*]] = icmp sgt i32 [[TMP36]], [[TMP37]] -; SKX-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[TMP36]], i32 undef -; SKX-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP2]], i32 13 -; SKX-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] -; SKX-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP39]], i32 undef -; SKX-NEXT: [[TMP43:%.*]] = extractelement <16 x i32> [[TMP2]], i32 14 -; SKX-NEXT: [[TMP44:%.*]] = icmp sgt i32 [[TMP42]], [[TMP43]] -; SKX-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[TMP42]], i32 undef -; SKX-NEXT: [[TMP46:%.*]] = extractelement <16 x i32> [[TMP2]], i32 15 -; SKX-NEXT: [[TMP47:%.*]] = icmp sgt i32 [[TMP45]], [[TMP46]] ; SKX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> undef, <16 x i32> ; SKX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <16 x i32> [[TMP2]], [[RDX_SHUF]] ; SKX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x i32> [[TMP2]], <16 x i32> [[RDX_SHUF]] @@ -402,9 +167,8 @@ ; SKX-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> undef, <16 x i32> ; SKX-NEXT: [[RDX_MINMAX_CMP8:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] ; SKX-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> [[RDX_SHUF7]] -; SKX-NEXT: [[TMP48:%.*]] = extractelement <16 x i32> [[RDX_MINMAX_SELECT9]], i32 0 -; SKX-NEXT: [[TMP49:%.*]] = select i1 [[TMP47]], i32 [[TMP45]], i32 undef -; SKX-NEXT: ret i32 [[TMP48]] +; SKX-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[RDX_MINMAX_SELECT9]], i32 0 +; SKX-NEXT: ret i32 [[TMP3]] ; %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 @@ -458,99 +222,6 @@ define i32 @maxi32(i32) { ; CHECK-LABEL: @maxi32( ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr to <32 x i32>*), align 16 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <32 x i32> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <32 x i32> [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <32 x i32> [[TMP2]], i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <32 x i32> [[TMP2]], i32 3 -; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <32 x i32> [[TMP2]], i32 4 -; CHECK-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <32 x i32> [[TMP2]], i32 5 -; CHECK-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <32 x i32> [[TMP2]], i32 6 -; CHECK-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] -; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 undef -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <32 x i32> [[TMP2]], i32 7 -; CHECK-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] -; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 undef -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <32 x i32> [[TMP2]], i32 8 -; CHECK-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[TMP24]], i32 undef -; CHECK-NEXT: [[TMP28:%.*]] = extractelement <32 x i32> [[TMP2]], i32 9 -; CHECK-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] -; CHECK-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP27]], i32 undef -; CHECK-NEXT: [[TMP31:%.*]] = extractelement <32 x i32> [[TMP2]], i32 10 -; CHECK-NEXT: [[TMP32:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] -; CHECK-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[TMP30]], i32 undef -; CHECK-NEXT: [[TMP34:%.*]] = extractelement <32 x i32> [[TMP2]], i32 11 -; CHECK-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]] -; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP33]], i32 undef -; CHECK-NEXT: [[TMP37:%.*]] = extractelement <32 x i32> [[TMP2]], i32 12 -; CHECK-NEXT: [[TMP38:%.*]] = icmp sgt i32 [[TMP36]], [[TMP37]] -; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[TMP36]], i32 undef -; CHECK-NEXT: [[TMP40:%.*]] = extractelement <32 x i32> [[TMP2]], i32 13 -; CHECK-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] -; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP39]], i32 undef -; CHECK-NEXT: [[TMP43:%.*]] = extractelement <32 x i32> [[TMP2]], i32 14 -; CHECK-NEXT: [[TMP44:%.*]] = icmp sgt i32 [[TMP42]], [[TMP43]] -; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[TMP42]], i32 undef -; CHECK-NEXT: [[TMP46:%.*]] = extractelement <32 x i32> [[TMP2]], i32 15 -; CHECK-NEXT: [[TMP47:%.*]] = icmp sgt i32 [[TMP45]], [[TMP46]] -; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[TMP45]], i32 undef -; CHECK-NEXT: [[TMP49:%.*]] = extractelement <32 x i32> [[TMP2]], i32 16 -; CHECK-NEXT: [[TMP50:%.*]] = icmp sgt i32 [[TMP48]], [[TMP49]] -; CHECK-NEXT: [[TMP51:%.*]] = select i1 [[TMP50]], i32 [[TMP48]], i32 undef -; CHECK-NEXT: [[TMP52:%.*]] = extractelement <32 x i32> [[TMP2]], i32 17 -; CHECK-NEXT: [[TMP53:%.*]] = icmp sgt i32 [[TMP51]], [[TMP52]] -; CHECK-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[TMP51]], i32 undef -; CHECK-NEXT: [[TMP55:%.*]] = extractelement <32 x i32> [[TMP2]], i32 18 -; CHECK-NEXT: [[TMP56:%.*]] = icmp sgt i32 [[TMP54]], [[TMP55]] -; CHECK-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], i32 [[TMP54]], i32 undef -; CHECK-NEXT: [[TMP58:%.*]] = extractelement <32 x i32> [[TMP2]], i32 19 -; CHECK-NEXT: [[TMP59:%.*]] = icmp sgt i32 [[TMP57]], [[TMP58]] -; CHECK-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[TMP57]], i32 undef -; CHECK-NEXT: [[TMP61:%.*]] = extractelement <32 x i32> [[TMP2]], i32 20 -; CHECK-NEXT: [[TMP62:%.*]] = icmp sgt i32 [[TMP60]], [[TMP61]] -; CHECK-NEXT: [[TMP63:%.*]] = select i1 [[TMP62]], i32 [[TMP60]], i32 undef -; CHECK-NEXT: [[TMP64:%.*]] = extractelement <32 x i32> [[TMP2]], i32 21 -; CHECK-NEXT: [[TMP65:%.*]] = icmp sgt i32 [[TMP63]], [[TMP64]] -; CHECK-NEXT: [[TMP66:%.*]] = select i1 [[TMP65]], i32 [[TMP63]], i32 undef -; CHECK-NEXT: [[TMP67:%.*]] = extractelement <32 x i32> [[TMP2]], i32 22 -; CHECK-NEXT: [[TMP68:%.*]] = icmp sgt i32 [[TMP66]], [[TMP67]] -; CHECK-NEXT: [[TMP69:%.*]] = select i1 [[TMP68]], i32 [[TMP66]], i32 undef -; CHECK-NEXT: [[TMP70:%.*]] = extractelement <32 x i32> [[TMP2]], i32 23 -; CHECK-NEXT: [[TMP71:%.*]] = icmp sgt i32 [[TMP69]], [[TMP70]] -; CHECK-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], i32 [[TMP69]], i32 undef -; CHECK-NEXT: [[TMP73:%.*]] = extractelement <32 x i32> [[TMP2]], i32 24 -; CHECK-NEXT: [[TMP74:%.*]] = icmp sgt i32 [[TMP72]], [[TMP73]] -; CHECK-NEXT: [[TMP75:%.*]] = select i1 [[TMP74]], i32 [[TMP72]], i32 undef -; CHECK-NEXT: [[TMP76:%.*]] = extractelement <32 x i32> [[TMP2]], i32 25 -; CHECK-NEXT: [[TMP77:%.*]] = icmp sgt i32 [[TMP75]], [[TMP76]] -; CHECK-NEXT: [[TMP78:%.*]] = select i1 [[TMP77]], i32 [[TMP75]], i32 undef -; CHECK-NEXT: [[TMP79:%.*]] = extractelement <32 x i32> [[TMP2]], i32 26 -; CHECK-NEXT: [[TMP80:%.*]] = icmp sgt i32 [[TMP78]], [[TMP79]] -; CHECK-NEXT: [[TMP81:%.*]] = select i1 [[TMP80]], i32 [[TMP78]], i32 undef -; CHECK-NEXT: [[TMP82:%.*]] = extractelement <32 x i32> [[TMP2]], i32 27 -; CHECK-NEXT: [[TMP83:%.*]] = icmp sgt i32 [[TMP81]], [[TMP82]] -; CHECK-NEXT: [[TMP84:%.*]] = select i1 [[TMP83]], i32 [[TMP81]], i32 undef -; CHECK-NEXT: [[TMP85:%.*]] = extractelement <32 x i32> [[TMP2]], i32 28 -; CHECK-NEXT: [[TMP86:%.*]] = icmp sgt i32 [[TMP84]], [[TMP85]] -; CHECK-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], i32 [[TMP84]], i32 undef -; CHECK-NEXT: [[TMP88:%.*]] = extractelement <32 x i32> [[TMP2]], i32 29 -; CHECK-NEXT: [[TMP89:%.*]] = icmp sgt i32 [[TMP87]], [[TMP88]] -; CHECK-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], i32 [[TMP87]], i32 undef -; CHECK-NEXT: [[TMP91:%.*]] = extractelement <32 x i32> [[TMP2]], i32 30 -; CHECK-NEXT: [[TMP92:%.*]] = icmp sgt i32 [[TMP90]], [[TMP91]] -; CHECK-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], i32 [[TMP90]], i32 undef -; CHECK-NEXT: [[TMP94:%.*]] = extractelement <32 x i32> [[TMP2]], i32 31 -; CHECK-NEXT: [[TMP95:%.*]] = icmp sgt i32 [[TMP93]], [[TMP94]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP2]], <32 x i32> undef, <32 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <32 x i32> [[TMP2]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP]], <32 x i32> [[TMP2]], <32 x i32> [[RDX_SHUF]] @@ -566,105 +237,11 @@ ; CHECK-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> undef, <32 x i32> ; CHECK-NEXT: [[RDX_MINMAX_CMP11:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP11]], <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> [[RDX_SHUF10]] -; CHECK-NEXT: [[TMP96:%.*]] = extractelement <32 x i32> [[RDX_MINMAX_SELECT12]], i32 0 -; CHECK-NEXT: [[TMP97:%.*]] = select i1 [[TMP95]], i32 [[TMP93]], i32 undef -; CHECK-NEXT: ret i32 [[TMP96]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <32 x i32> [[RDX_MINMAX_SELECT12]], i32 0 +; CHECK-NEXT: ret i32 [[TMP3]] ; ; AVX-LABEL: @maxi32( ; AVX-NEXT: [[TMP2:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr to <32 x i32>*), align 16 -; AVX-NEXT: [[TMP3:%.*]] = extractelement <32 x i32> [[TMP2]], i32 0 -; AVX-NEXT: [[TMP4:%.*]] = extractelement <32 x i32> [[TMP2]], i32 1 -; AVX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] -; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef -; AVX-NEXT: [[TMP7:%.*]] = extractelement <32 x i32> [[TMP2]], i32 2 -; AVX-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] -; AVX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef -; AVX-NEXT: [[TMP10:%.*]] = extractelement <32 x i32> [[TMP2]], i32 3 -; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef -; AVX-NEXT: [[TMP13:%.*]] = extractelement <32 x i32> [[TMP2]], i32 4 -; AVX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -; AVX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef -; AVX-NEXT: [[TMP16:%.*]] = extractelement <32 x i32> [[TMP2]], i32 5 -; AVX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef -; AVX-NEXT: [[TMP19:%.*]] = extractelement <32 x i32> [[TMP2]], i32 6 -; AVX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] -; AVX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 undef -; AVX-NEXT: [[TMP22:%.*]] = extractelement <32 x i32> [[TMP2]], i32 7 -; AVX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] -; AVX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 undef -; AVX-NEXT: [[TMP25:%.*]] = extractelement <32 x i32> [[TMP2]], i32 8 -; AVX-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -; AVX-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[TMP24]], i32 undef -; AVX-NEXT: [[TMP28:%.*]] = extractelement <32 x i32> [[TMP2]], i32 9 -; AVX-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] -; AVX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP27]], i32 undef -; AVX-NEXT: [[TMP31:%.*]] = extractelement <32 x i32> [[TMP2]], i32 10 -; AVX-NEXT: [[TMP32:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] -; AVX-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[TMP30]], i32 undef -; AVX-NEXT: [[TMP34:%.*]] = extractelement <32 x i32> [[TMP2]], i32 11 -; AVX-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]] -; AVX-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP33]], i32 undef -; AVX-NEXT: [[TMP37:%.*]] = extractelement <32 x i32> [[TMP2]], i32 12 -; AVX-NEXT: [[TMP38:%.*]] = icmp sgt i32 [[TMP36]], [[TMP37]] -; AVX-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[TMP36]], i32 undef -; AVX-NEXT: [[TMP40:%.*]] = extractelement <32 x i32> [[TMP2]], i32 13 -; AVX-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] -; AVX-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP39]], i32 undef -; AVX-NEXT: [[TMP43:%.*]] = extractelement <32 x i32> [[TMP2]], i32 14 -; AVX-NEXT: [[TMP44:%.*]] = icmp sgt i32 [[TMP42]], [[TMP43]] -; AVX-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[TMP42]], i32 undef -; AVX-NEXT: [[TMP46:%.*]] = extractelement <32 x i32> [[TMP2]], i32 15 -; AVX-NEXT: [[TMP47:%.*]] = icmp sgt i32 [[TMP45]], [[TMP46]] -; AVX-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[TMP45]], i32 undef -; AVX-NEXT: [[TMP49:%.*]] = extractelement <32 x i32> [[TMP2]], i32 16 -; AVX-NEXT: [[TMP50:%.*]] = icmp sgt i32 [[TMP48]], [[TMP49]] -; AVX-NEXT: [[TMP51:%.*]] = select i1 [[TMP50]], i32 [[TMP48]], i32 undef -; AVX-NEXT: [[TMP52:%.*]] = extractelement <32 x i32> [[TMP2]], i32 17 -; AVX-NEXT: [[TMP53:%.*]] = icmp sgt i32 [[TMP51]], [[TMP52]] -; AVX-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[TMP51]], i32 undef -; AVX-NEXT: [[TMP55:%.*]] = extractelement <32 x i32> [[TMP2]], i32 18 -; AVX-NEXT: [[TMP56:%.*]] = icmp sgt i32 [[TMP54]], [[TMP55]] -; AVX-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], i32 [[TMP54]], i32 undef -; AVX-NEXT: [[TMP58:%.*]] = extractelement <32 x i32> [[TMP2]], i32 19 -; AVX-NEXT: [[TMP59:%.*]] = icmp sgt i32 [[TMP57]], [[TMP58]] -; AVX-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[TMP57]], i32 undef -; AVX-NEXT: [[TMP61:%.*]] = extractelement <32 x i32> [[TMP2]], i32 20 -; AVX-NEXT: [[TMP62:%.*]] = icmp sgt i32 [[TMP60]], [[TMP61]] -; AVX-NEXT: [[TMP63:%.*]] = select i1 [[TMP62]], i32 [[TMP60]], i32 undef -; AVX-NEXT: [[TMP64:%.*]] = extractelement <32 x i32> [[TMP2]], i32 21 -; AVX-NEXT: [[TMP65:%.*]] = icmp sgt i32 [[TMP63]], [[TMP64]] -; AVX-NEXT: [[TMP66:%.*]] = select i1 [[TMP65]], i32 [[TMP63]], i32 undef -; AVX-NEXT: [[TMP67:%.*]] = extractelement <32 x i32> [[TMP2]], i32 22 -; AVX-NEXT: [[TMP68:%.*]] = icmp sgt i32 [[TMP66]], [[TMP67]] -; AVX-NEXT: [[TMP69:%.*]] = select i1 [[TMP68]], i32 [[TMP66]], i32 undef -; AVX-NEXT: [[TMP70:%.*]] = extractelement <32 x i32> [[TMP2]], i32 23 -; AVX-NEXT: [[TMP71:%.*]] = icmp sgt i32 [[TMP69]], [[TMP70]] -; AVX-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], i32 [[TMP69]], i32 undef -; AVX-NEXT: [[TMP73:%.*]] = extractelement <32 x i32> [[TMP2]], i32 24 -; AVX-NEXT: [[TMP74:%.*]] = icmp sgt i32 [[TMP72]], [[TMP73]] -; AVX-NEXT: [[TMP75:%.*]] = select i1 [[TMP74]], i32 [[TMP72]], i32 undef -; AVX-NEXT: [[TMP76:%.*]] = extractelement <32 x i32> [[TMP2]], i32 25 -; AVX-NEXT: [[TMP77:%.*]] = icmp sgt i32 [[TMP75]], [[TMP76]] -; AVX-NEXT: [[TMP78:%.*]] = select i1 [[TMP77]], i32 [[TMP75]], i32 undef -; AVX-NEXT: [[TMP79:%.*]] = extractelement <32 x i32> [[TMP2]], i32 26 -; AVX-NEXT: [[TMP80:%.*]] = icmp sgt i32 [[TMP78]], [[TMP79]] -; AVX-NEXT: [[TMP81:%.*]] = select i1 [[TMP80]], i32 [[TMP78]], i32 undef -; AVX-NEXT: [[TMP82:%.*]] = extractelement <32 x i32> [[TMP2]], i32 27 -; AVX-NEXT: [[TMP83:%.*]] = icmp sgt i32 [[TMP81]], [[TMP82]] -; AVX-NEXT: [[TMP84:%.*]] = select i1 [[TMP83]], i32 [[TMP81]], i32 undef -; AVX-NEXT: [[TMP85:%.*]] = extractelement <32 x i32> [[TMP2]], i32 28 -; AVX-NEXT: [[TMP86:%.*]] = icmp sgt i32 [[TMP84]], [[TMP85]] -; AVX-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], i32 [[TMP84]], i32 undef -; AVX-NEXT: [[TMP88:%.*]] = extractelement <32 x i32> [[TMP2]], i32 29 -; AVX-NEXT: [[TMP89:%.*]] = icmp sgt i32 [[TMP87]], [[TMP88]] -; AVX-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], i32 [[TMP87]], i32 undef -; AVX-NEXT: [[TMP91:%.*]] = extractelement <32 x i32> [[TMP2]], i32 30 -; AVX-NEXT: [[TMP92:%.*]] = icmp sgt i32 [[TMP90]], [[TMP91]] -; AVX-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], i32 [[TMP90]], i32 undef -; AVX-NEXT: [[TMP94:%.*]] = extractelement <32 x i32> [[TMP2]], i32 31 -; AVX-NEXT: [[TMP95:%.*]] = icmp sgt i32 [[TMP93]], [[TMP94]] ; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP2]], <32 x i32> undef, <32 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <32 x i32> [[TMP2]], [[RDX_SHUF]] ; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP]], <32 x i32> [[TMP2]], <32 x i32> [[RDX_SHUF]] @@ -680,105 +257,11 @@ ; AVX-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> undef, <32 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP11:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]] ; AVX-NEXT: [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP11]], <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> [[RDX_SHUF10]] -; AVX-NEXT: [[TMP96:%.*]] = extractelement <32 x i32> [[RDX_MINMAX_SELECT12]], i32 0 -; AVX-NEXT: [[TMP97:%.*]] = select i1 [[TMP95]], i32 [[TMP93]], i32 undef -; AVX-NEXT: ret i32 [[TMP96]] +; AVX-NEXT: [[TMP3:%.*]] = extractelement <32 x i32> [[RDX_MINMAX_SELECT12]], i32 0 +; AVX-NEXT: ret i32 [[TMP3]] ; ; AVX2-LABEL: @maxi32( ; AVX2-NEXT: [[TMP2:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr to <32 x i32>*), align 16 -; AVX2-NEXT: [[TMP3:%.*]] = extractelement <32 x i32> [[TMP2]], i32 0 -; AVX2-NEXT: [[TMP4:%.*]] = extractelement <32 x i32> [[TMP2]], i32 1 -; AVX2-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] -; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef -; AVX2-NEXT: [[TMP7:%.*]] = extractelement <32 x i32> [[TMP2]], i32 2 -; AVX2-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] -; AVX2-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef -; AVX2-NEXT: [[TMP10:%.*]] = extractelement <32 x i32> [[TMP2]], i32 3 -; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef -; AVX2-NEXT: [[TMP13:%.*]] = extractelement <32 x i32> [[TMP2]], i32 4 -; AVX2-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -; AVX2-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef -; AVX2-NEXT: [[TMP16:%.*]] = extractelement <32 x i32> [[TMP2]], i32 5 -; AVX2-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef -; AVX2-NEXT: [[TMP19:%.*]] = extractelement <32 x i32> [[TMP2]], i32 6 -; AVX2-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] -; AVX2-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 undef -; AVX2-NEXT: [[TMP22:%.*]] = extractelement <32 x i32> [[TMP2]], i32 7 -; AVX2-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] -; AVX2-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 undef -; AVX2-NEXT: [[TMP25:%.*]] = extractelement <32 x i32> [[TMP2]], i32 8 -; AVX2-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -; AVX2-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[TMP24]], i32 undef -; AVX2-NEXT: [[TMP28:%.*]] = extractelement <32 x i32> [[TMP2]], i32 9 -; AVX2-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] -; AVX2-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP27]], i32 undef -; AVX2-NEXT: [[TMP31:%.*]] = extractelement <32 x i32> [[TMP2]], i32 10 -; AVX2-NEXT: [[TMP32:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] -; AVX2-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[TMP30]], i32 undef -; AVX2-NEXT: [[TMP34:%.*]] = extractelement <32 x i32> [[TMP2]], i32 11 -; AVX2-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]] -; AVX2-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP33]], i32 undef -; AVX2-NEXT: [[TMP37:%.*]] = extractelement <32 x i32> [[TMP2]], i32 12 -; AVX2-NEXT: [[TMP38:%.*]] = icmp sgt i32 [[TMP36]], [[TMP37]] -; AVX2-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[TMP36]], i32 undef -; AVX2-NEXT: [[TMP40:%.*]] = extractelement <32 x i32> [[TMP2]], i32 13 -; AVX2-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] -; AVX2-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP39]], i32 undef -; AVX2-NEXT: [[TMP43:%.*]] = extractelement <32 x i32> [[TMP2]], i32 14 -; AVX2-NEXT: [[TMP44:%.*]] = icmp sgt i32 [[TMP42]], [[TMP43]] -; AVX2-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[TMP42]], i32 undef -; AVX2-NEXT: [[TMP46:%.*]] = extractelement <32 x i32> [[TMP2]], i32 15 -; AVX2-NEXT: [[TMP47:%.*]] = icmp sgt i32 [[TMP45]], [[TMP46]] -; AVX2-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[TMP45]], i32 undef -; AVX2-NEXT: [[TMP49:%.*]] = extractelement <32 x i32> [[TMP2]], i32 16 -; AVX2-NEXT: [[TMP50:%.*]] = icmp sgt i32 [[TMP48]], [[TMP49]] -; AVX2-NEXT: [[TMP51:%.*]] = select i1 [[TMP50]], i32 [[TMP48]], i32 undef -; AVX2-NEXT: [[TMP52:%.*]] = extractelement <32 x i32> [[TMP2]], i32 17 -; AVX2-NEXT: [[TMP53:%.*]] = icmp sgt i32 [[TMP51]], [[TMP52]] -; AVX2-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[TMP51]], i32 undef -; AVX2-NEXT: [[TMP55:%.*]] = extractelement <32 x i32> [[TMP2]], i32 18 -; AVX2-NEXT: [[TMP56:%.*]] = icmp sgt i32 [[TMP54]], [[TMP55]] -; AVX2-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], i32 [[TMP54]], i32 undef -; AVX2-NEXT: [[TMP58:%.*]] = extractelement <32 x i32> [[TMP2]], i32 19 -; AVX2-NEXT: [[TMP59:%.*]] = icmp sgt i32 [[TMP57]], [[TMP58]] -; AVX2-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[TMP57]], i32 undef -; AVX2-NEXT: [[TMP61:%.*]] = extractelement <32 x i32> [[TMP2]], i32 20 -; AVX2-NEXT: [[TMP62:%.*]] = icmp sgt i32 [[TMP60]], [[TMP61]] -; AVX2-NEXT: [[TMP63:%.*]] = select i1 [[TMP62]], i32 [[TMP60]], i32 undef -; AVX2-NEXT: [[TMP64:%.*]] = extractelement <32 x i32> [[TMP2]], i32 21 -; AVX2-NEXT: [[TMP65:%.*]] = icmp sgt i32 [[TMP63]], [[TMP64]] -; AVX2-NEXT: [[TMP66:%.*]] = select i1 [[TMP65]], i32 [[TMP63]], i32 undef -; AVX2-NEXT: [[TMP67:%.*]] = extractelement <32 x i32> [[TMP2]], i32 22 -; AVX2-NEXT: [[TMP68:%.*]] = icmp sgt i32 [[TMP66]], [[TMP67]] -; AVX2-NEXT: [[TMP69:%.*]] = select i1 [[TMP68]], i32 [[TMP66]], i32 undef -; AVX2-NEXT: [[TMP70:%.*]] = extractelement <32 x i32> [[TMP2]], i32 23 -; AVX2-NEXT: [[TMP71:%.*]] = icmp sgt i32 [[TMP69]], [[TMP70]] -; AVX2-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], i32 [[TMP69]], i32 undef -; AVX2-NEXT: [[TMP73:%.*]] = extractelement <32 x i32> [[TMP2]], i32 24 -; AVX2-NEXT: [[TMP74:%.*]] = icmp sgt i32 [[TMP72]], [[TMP73]] -; AVX2-NEXT: [[TMP75:%.*]] = select i1 [[TMP74]], i32 [[TMP72]], i32 undef -; AVX2-NEXT: [[TMP76:%.*]] = extractelement <32 x i32> [[TMP2]], i32 25 -; AVX2-NEXT: [[TMP77:%.*]] = icmp sgt i32 [[TMP75]], [[TMP76]] -; AVX2-NEXT: [[TMP78:%.*]] = select i1 [[TMP77]], i32 [[TMP75]], i32 undef -; AVX2-NEXT: [[TMP79:%.*]] = extractelement <32 x i32> [[TMP2]], i32 26 -; AVX2-NEXT: [[TMP80:%.*]] = icmp sgt i32 [[TMP78]], [[TMP79]] -; AVX2-NEXT: [[TMP81:%.*]] = select i1 [[TMP80]], i32 [[TMP78]], i32 undef -; AVX2-NEXT: [[TMP82:%.*]] = extractelement <32 x i32> [[TMP2]], i32 27 -; AVX2-NEXT: [[TMP83:%.*]] = icmp sgt i32 [[TMP81]], [[TMP82]] -; AVX2-NEXT: [[TMP84:%.*]] = select i1 [[TMP83]], i32 [[TMP81]], i32 undef -; AVX2-NEXT: [[TMP85:%.*]] = extractelement <32 x i32> [[TMP2]], i32 28 -; AVX2-NEXT: [[TMP86:%.*]] = icmp sgt i32 [[TMP84]], [[TMP85]] -; AVX2-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], i32 [[TMP84]], i32 undef -; AVX2-NEXT: [[TMP88:%.*]] = extractelement <32 x i32> [[TMP2]], i32 29 -; AVX2-NEXT: [[TMP89:%.*]] = icmp sgt i32 [[TMP87]], [[TMP88]] -; AVX2-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], i32 [[TMP87]], i32 undef -; AVX2-NEXT: [[TMP91:%.*]] = extractelement <32 x i32> [[TMP2]], i32 30 -; AVX2-NEXT: [[TMP92:%.*]] = icmp sgt i32 [[TMP90]], [[TMP91]] -; AVX2-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], i32 [[TMP90]], i32 undef -; AVX2-NEXT: [[TMP94:%.*]] = extractelement <32 x i32> [[TMP2]], i32 31 -; AVX2-NEXT: [[TMP95:%.*]] = icmp sgt i32 [[TMP93]], [[TMP94]] ; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP2]], <32 x i32> undef, <32 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <32 x i32> [[TMP2]], [[RDX_SHUF]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP]], <32 x i32> [[TMP2]], <32 x i32> [[RDX_SHUF]] @@ -794,105 +277,11 @@ ; AVX2-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> undef, <32 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP11:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP11]], <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> [[RDX_SHUF10]] -; AVX2-NEXT: [[TMP96:%.*]] = extractelement <32 x i32> [[RDX_MINMAX_SELECT12]], i32 0 -; AVX2-NEXT: [[TMP97:%.*]] = select i1 [[TMP95]], i32 [[TMP93]], i32 undef -; AVX2-NEXT: ret i32 [[TMP96]] +; AVX2-NEXT: [[TMP3:%.*]] = extractelement <32 x i32> [[RDX_MINMAX_SELECT12]], i32 0 +; AVX2-NEXT: ret i32 [[TMP3]] ; ; SKX-LABEL: @maxi32( ; SKX-NEXT: [[TMP2:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr to <32 x i32>*), align 16 -; SKX-NEXT: [[TMP3:%.*]] = extractelement <32 x i32> [[TMP2]], i32 0 -; SKX-NEXT: [[TMP4:%.*]] = extractelement <32 x i32> [[TMP2]], i32 1 -; SKX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] -; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef -; SKX-NEXT: [[TMP7:%.*]] = extractelement <32 x i32> [[TMP2]], i32 2 -; SKX-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] -; SKX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef -; SKX-NEXT: [[TMP10:%.*]] = extractelement <32 x i32> [[TMP2]], i32 3 -; SKX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef -; SKX-NEXT: [[TMP13:%.*]] = extractelement <32 x i32> [[TMP2]], i32 4 -; SKX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -; SKX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef -; SKX-NEXT: [[TMP16:%.*]] = extractelement <32 x i32> [[TMP2]], i32 5 -; SKX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef -; SKX-NEXT: [[TMP19:%.*]] = extractelement <32 x i32> [[TMP2]], i32 6 -; SKX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] -; SKX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 undef -; SKX-NEXT: [[TMP22:%.*]] = extractelement <32 x i32> [[TMP2]], i32 7 -; SKX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] -; SKX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 undef -; SKX-NEXT: [[TMP25:%.*]] = extractelement <32 x i32> [[TMP2]], i32 8 -; SKX-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] -; SKX-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[TMP24]], i32 undef -; SKX-NEXT: [[TMP28:%.*]] = extractelement <32 x i32> [[TMP2]], i32 9 -; SKX-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] -; SKX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP27]], i32 undef -; SKX-NEXT: [[TMP31:%.*]] = extractelement <32 x i32> [[TMP2]], i32 10 -; SKX-NEXT: [[TMP32:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] -; SKX-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[TMP30]], i32 undef -; SKX-NEXT: [[TMP34:%.*]] = extractelement <32 x i32> [[TMP2]], i32 11 -; SKX-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]] -; SKX-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP33]], i32 undef -; SKX-NEXT: [[TMP37:%.*]] = extractelement <32 x i32> [[TMP2]], i32 12 -; SKX-NEXT: [[TMP38:%.*]] = icmp sgt i32 [[TMP36]], [[TMP37]] -; SKX-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[TMP36]], i32 undef -; SKX-NEXT: [[TMP40:%.*]] = extractelement <32 x i32> [[TMP2]], i32 13 -; SKX-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] -; SKX-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP39]], i32 undef -; SKX-NEXT: [[TMP43:%.*]] = extractelement <32 x i32> [[TMP2]], i32 14 -; SKX-NEXT: [[TMP44:%.*]] = icmp sgt i32 [[TMP42]], [[TMP43]] -; SKX-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 [[TMP42]], i32 undef -; SKX-NEXT: [[TMP46:%.*]] = extractelement <32 x i32> [[TMP2]], i32 15 -; SKX-NEXT: [[TMP47:%.*]] = icmp sgt i32 [[TMP45]], [[TMP46]] -; SKX-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[TMP45]], i32 undef -; SKX-NEXT: [[TMP49:%.*]] = extractelement <32 x i32> [[TMP2]], i32 16 -; SKX-NEXT: [[TMP50:%.*]] = icmp sgt i32 [[TMP48]], [[TMP49]] -; SKX-NEXT: [[TMP51:%.*]] = select i1 [[TMP50]], i32 [[TMP48]], i32 undef -; SKX-NEXT: [[TMP52:%.*]] = extractelement <32 x i32> [[TMP2]], i32 17 -; SKX-NEXT: [[TMP53:%.*]] = icmp sgt i32 [[TMP51]], [[TMP52]] -; SKX-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[TMP51]], i32 undef -; SKX-NEXT: [[TMP55:%.*]] = extractelement <32 x i32> [[TMP2]], i32 18 -; SKX-NEXT: [[TMP56:%.*]] = icmp sgt i32 [[TMP54]], [[TMP55]] -; SKX-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], i32 [[TMP54]], i32 undef -; SKX-NEXT: [[TMP58:%.*]] = extractelement <32 x i32> [[TMP2]], i32 19 -; SKX-NEXT: [[TMP59:%.*]] = icmp sgt i32 [[TMP57]], [[TMP58]] -; SKX-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[TMP57]], i32 undef -; SKX-NEXT: [[TMP61:%.*]] = extractelement <32 x i32> [[TMP2]], i32 20 -; SKX-NEXT: [[TMP62:%.*]] = icmp sgt i32 [[TMP60]], [[TMP61]] -; SKX-NEXT: [[TMP63:%.*]] = select i1 [[TMP62]], i32 [[TMP60]], i32 undef -; SKX-NEXT: [[TMP64:%.*]] = extractelement <32 x i32> [[TMP2]], i32 21 -; SKX-NEXT: [[TMP65:%.*]] = icmp sgt i32 [[TMP63]], [[TMP64]] -; SKX-NEXT: [[TMP66:%.*]] = select i1 [[TMP65]], i32 [[TMP63]], i32 undef -; SKX-NEXT: [[TMP67:%.*]] = extractelement <32 x i32> [[TMP2]], i32 22 -; SKX-NEXT: [[TMP68:%.*]] = icmp sgt i32 [[TMP66]], [[TMP67]] -; SKX-NEXT: [[TMP69:%.*]] = select i1 [[TMP68]], i32 [[TMP66]], i32 undef -; SKX-NEXT: [[TMP70:%.*]] = extractelement <32 x i32> [[TMP2]], i32 23 -; SKX-NEXT: [[TMP71:%.*]] = icmp sgt i32 [[TMP69]], [[TMP70]] -; SKX-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], i32 [[TMP69]], i32 undef -; SKX-NEXT: [[TMP73:%.*]] = extractelement <32 x i32> [[TMP2]], i32 24 -; SKX-NEXT: [[TMP74:%.*]] = icmp sgt i32 [[TMP72]], [[TMP73]] -; SKX-NEXT: [[TMP75:%.*]] = select i1 [[TMP74]], i32 [[TMP72]], i32 undef -; SKX-NEXT: [[TMP76:%.*]] = extractelement <32 x i32> [[TMP2]], i32 25 -; SKX-NEXT: [[TMP77:%.*]] = icmp sgt i32 [[TMP75]], [[TMP76]] -; SKX-NEXT: [[TMP78:%.*]] = select i1 [[TMP77]], i32 [[TMP75]], i32 undef -; SKX-NEXT: [[TMP79:%.*]] = extractelement <32 x i32> [[TMP2]], i32 26 -; SKX-NEXT: [[TMP80:%.*]] = icmp sgt i32 [[TMP78]], [[TMP79]] -; SKX-NEXT: [[TMP81:%.*]] = select i1 [[TMP80]], i32 [[TMP78]], i32 undef -; SKX-NEXT: [[TMP82:%.*]] = extractelement <32 x i32> [[TMP2]], i32 27 -; SKX-NEXT: [[TMP83:%.*]] = icmp sgt i32 [[TMP81]], [[TMP82]] -; SKX-NEXT: [[TMP84:%.*]] = select i1 [[TMP83]], i32 [[TMP81]], i32 undef -; SKX-NEXT: [[TMP85:%.*]] = extractelement <32 x i32> [[TMP2]], i32 28 -; SKX-NEXT: [[TMP86:%.*]] = icmp sgt i32 [[TMP84]], [[TMP85]] -; SKX-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], i32 [[TMP84]], i32 undef -; SKX-NEXT: [[TMP88:%.*]] = extractelement <32 x i32> [[TMP2]], i32 29 -; SKX-NEXT: [[TMP89:%.*]] = icmp sgt i32 [[TMP87]], [[TMP88]] -; SKX-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], i32 [[TMP87]], i32 undef -; SKX-NEXT: [[TMP91:%.*]] = extractelement <32 x i32> [[TMP2]], i32 30 -; SKX-NEXT: [[TMP92:%.*]] = icmp sgt i32 [[TMP90]], [[TMP91]] -; SKX-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], i32 [[TMP90]], i32 undef -; SKX-NEXT: [[TMP94:%.*]] = extractelement <32 x i32> [[TMP2]], i32 31 -; SKX-NEXT: [[TMP95:%.*]] = icmp sgt i32 [[TMP93]], [[TMP94]] ; SKX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP2]], <32 x i32> undef, <32 x i32> ; SKX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <32 x i32> [[TMP2]], [[RDX_SHUF]] ; SKX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP]], <32 x i32> [[TMP2]], <32 x i32> [[RDX_SHUF]] @@ -908,9 +297,8 @@ ; SKX-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> undef, <32 x i32> ; SKX-NEXT: [[RDX_MINMAX_CMP11:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]] ; SKX-NEXT: [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP11]], <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> [[RDX_SHUF10]] -; SKX-NEXT: [[TMP96:%.*]] = extractelement <32 x i32> [[RDX_MINMAX_SELECT12]], i32 0 -; SKX-NEXT: [[TMP97:%.*]] = select i1 [[TMP95]], i32 [[TMP93]], i32 undef -; SKX-NEXT: ret i32 [[TMP96]] +; SKX-NEXT: [[TMP3:%.*]] = extractelement <32 x i32> [[RDX_MINMAX_SELECT12]], i32 0 +; SKX-NEXT: ret i32 [[TMP3]] ; %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 @@ -1037,27 +425,6 @@ ; ; AVX-LABEL: @maxf8( ; AVX-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr1 to <8 x float>*), align 16 -; AVX-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[TMP2]], i32 0 -; AVX-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[TMP2]], i32 1 -; AVX-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] -; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float undef, float undef -; AVX-NEXT: [[TMP7:%.*]] = extractelement <8 x float> [[TMP2]], i32 2 -; AVX-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]] -; AVX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float undef -; AVX-NEXT: [[TMP10:%.*]] = extractelement <8 x float> [[TMP2]], i32 3 -; AVX-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]] -; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float undef -; AVX-NEXT: [[TMP13:%.*]] = extractelement <8 x float> [[TMP2]], i32 4 -; AVX-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]] -; AVX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float undef -; AVX-NEXT: [[TMP16:%.*]] = extractelement <8 x float> [[TMP2]], i32 5 -; AVX-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]] -; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float undef -; AVX-NEXT: [[TMP19:%.*]] = extractelement <8 x float> [[TMP2]], i32 6 -; AVX-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]] -; AVX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float undef -; AVX-NEXT: [[TMP22:%.*]] = extractelement <8 x float> [[TMP2]], i32 7 -; AVX-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]] ; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> undef, <8 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <8 x float> [[TMP2]], [[RDX_SHUF]] ; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x float> [[TMP2]], <8 x float> [[RDX_SHUF]] @@ -1067,33 +434,11 @@ ; AVX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> undef, <8 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <8 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] ; AVX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> [[RDX_SHUF4]] -; AVX-NEXT: [[TMP24:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT6]], i32 0 -; AVX-NEXT: [[TMP25:%.*]] = select i1 [[TMP23]], float [[TMP21]], float undef -; AVX-NEXT: ret float [[TMP24]] +; AVX-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT6]], i32 0 +; AVX-NEXT: ret float [[TMP3]] ; ; AVX2-LABEL: @maxf8( ; AVX2-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr1 to <8 x float>*), align 16 -; AVX2-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[TMP2]], i32 0 -; AVX2-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[TMP2]], i32 1 -; AVX2-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] -; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float undef, float undef -; AVX2-NEXT: [[TMP7:%.*]] = extractelement <8 x float> [[TMP2]], i32 2 -; AVX2-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]] -; AVX2-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float undef -; AVX2-NEXT: [[TMP10:%.*]] = extractelement <8 x float> [[TMP2]], i32 3 -; AVX2-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]] -; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float undef -; AVX2-NEXT: [[TMP13:%.*]] = extractelement <8 x float> [[TMP2]], i32 4 -; AVX2-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]] -; AVX2-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float undef -; AVX2-NEXT: [[TMP16:%.*]] = extractelement <8 x float> [[TMP2]], i32 5 -; AVX2-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]] -; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float undef -; AVX2-NEXT: [[TMP19:%.*]] = extractelement <8 x float> [[TMP2]], i32 6 -; AVX2-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]] -; AVX2-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float undef -; AVX2-NEXT: [[TMP22:%.*]] = extractelement <8 x float> [[TMP2]], i32 7 -; AVX2-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]] ; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> undef, <8 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <8 x float> [[TMP2]], [[RDX_SHUF]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x float> [[TMP2]], <8 x float> [[RDX_SHUF]] @@ -1103,33 +448,11 @@ ; AVX2-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> undef, <8 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <8 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> [[RDX_SHUF4]] -; AVX2-NEXT: [[TMP24:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT6]], i32 0 -; AVX2-NEXT: [[TMP25:%.*]] = select i1 [[TMP23]], float [[TMP21]], float undef -; AVX2-NEXT: ret float [[TMP24]] +; AVX2-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT6]], i32 0 +; AVX2-NEXT: ret float [[TMP3]] ; ; SKX-LABEL: @maxf8( ; SKX-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr1 to <8 x float>*), align 16 -; SKX-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[TMP2]], i32 0 -; SKX-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[TMP2]], i32 1 -; SKX-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] -; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float undef, float undef -; SKX-NEXT: [[TMP7:%.*]] = extractelement <8 x float> [[TMP2]], i32 2 -; SKX-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]] -; SKX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float undef -; SKX-NEXT: [[TMP10:%.*]] = extractelement <8 x float> [[TMP2]], i32 3 -; SKX-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]] -; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float undef -; SKX-NEXT: [[TMP13:%.*]] = extractelement <8 x float> [[TMP2]], i32 4 -; SKX-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]] -; SKX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float undef -; SKX-NEXT: [[TMP16:%.*]] = extractelement <8 x float> [[TMP2]], i32 5 -; SKX-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]] -; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float undef -; SKX-NEXT: [[TMP19:%.*]] = extractelement <8 x float> [[TMP2]], i32 6 -; SKX-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]] -; SKX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float undef -; SKX-NEXT: [[TMP22:%.*]] = extractelement <8 x float> [[TMP2]], i32 7 -; SKX-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]] ; SKX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> undef, <8 x i32> ; SKX-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <8 x float> [[TMP2]], [[RDX_SHUF]] ; SKX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP]], <8 x float> [[TMP2]], <8 x float> [[RDX_SHUF]] @@ -1139,9 +462,8 @@ ; SKX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> undef, <8 x i32> ; SKX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = fcmp fast ogt <8 x float> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] ; SKX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <8 x i1> [[RDX_MINMAX_CMP5]], <8 x float> [[RDX_MINMAX_SELECT3]], <8 x float> [[RDX_SHUF4]] -; SKX-NEXT: [[TMP24:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT6]], i32 0 -; SKX-NEXT: [[TMP25:%.*]] = select i1 [[TMP23]], float [[TMP21]], float undef -; SKX-NEXT: ret float [[TMP24]] +; SKX-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT6]], i32 0 +; SKX-NEXT: ret float [[TMP3]] ; %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16 %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4 @@ -1220,51 +542,6 @@ ; ; AVX-LABEL: @maxf16( ; AVX-NEXT: [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr1 to <16 x float>*), align 16 -; AVX-NEXT: [[TMP3:%.*]] = extractelement <16 x float> [[TMP2]], i32 0 -; AVX-NEXT: [[TMP4:%.*]] = extractelement <16 x float> [[TMP2]], i32 1 -; AVX-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] -; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float undef, float undef -; AVX-NEXT: [[TMP7:%.*]] = extractelement <16 x float> [[TMP2]], i32 2 -; AVX-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]] -; AVX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float undef -; AVX-NEXT: [[TMP10:%.*]] = extractelement <16 x float> [[TMP2]], i32 3 -; AVX-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]] -; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float undef -; AVX-NEXT: [[TMP13:%.*]] = extractelement <16 x float> [[TMP2]], i32 4 -; AVX-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]] -; AVX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float undef -; AVX-NEXT: [[TMP16:%.*]] = extractelement <16 x float> [[TMP2]], i32 5 -; AVX-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]] -; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float undef -; AVX-NEXT: [[TMP19:%.*]] = extractelement <16 x float> [[TMP2]], i32 6 -; AVX-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]] -; AVX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float undef -; AVX-NEXT: [[TMP22:%.*]] = extractelement <16 x float> [[TMP2]], i32 7 -; AVX-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]] -; AVX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float undef -; AVX-NEXT: [[TMP25:%.*]] = extractelement <16 x float> [[TMP2]], i32 8 -; AVX-NEXT: [[TMP26:%.*]] = fcmp fast ogt float [[TMP24]], [[TMP25]] -; AVX-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], float [[TMP24]], float undef -; AVX-NEXT: [[TMP28:%.*]] = extractelement <16 x float> [[TMP2]], i32 9 -; AVX-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP27]], [[TMP28]] -; AVX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP27]], float undef -; AVX-NEXT: [[TMP31:%.*]] = extractelement <16 x float> [[TMP2]], i32 10 -; AVX-NEXT: [[TMP32:%.*]] = fcmp fast ogt float [[TMP30]], [[TMP31]] -; AVX-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], float [[TMP30]], float undef -; AVX-NEXT: [[TMP34:%.*]] = extractelement <16 x float> [[TMP2]], i32 11 -; AVX-NEXT: [[TMP35:%.*]] = fcmp fast ogt float [[TMP33]], [[TMP34]] -; AVX-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP33]], float undef -; AVX-NEXT: [[TMP37:%.*]] = extractelement <16 x float> [[TMP2]], i32 12 -; AVX-NEXT: [[TMP38:%.*]] = fcmp fast ogt float [[TMP36]], [[TMP37]] -; AVX-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], float [[TMP36]], float undef -; AVX-NEXT: [[TMP40:%.*]] = extractelement <16 x float> [[TMP2]], i32 13 -; AVX-NEXT: [[TMP41:%.*]] = fcmp fast ogt float [[TMP39]], [[TMP40]] -; AVX-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP39]], float undef -; AVX-NEXT: [[TMP43:%.*]] = extractelement <16 x float> [[TMP2]], i32 14 -; AVX-NEXT: [[TMP44:%.*]] = fcmp fast ogt float [[TMP42]], [[TMP43]] -; AVX-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], float [[TMP42]], float undef -; AVX-NEXT: [[TMP46:%.*]] = extractelement <16 x float> [[TMP2]], i32 15 -; AVX-NEXT: [[TMP47:%.*]] = fcmp fast ogt float [[TMP45]], [[TMP46]] ; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> undef, <16 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <16 x float> [[TMP2]], [[RDX_SHUF]] ; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x float> [[TMP2]], <16 x float> [[RDX_SHUF]] @@ -1277,57 +554,11 @@ ; AVX-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> undef, <16 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP8:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] ; AVX-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> [[RDX_SHUF7]] -; AVX-NEXT: [[TMP48:%.*]] = extractelement <16 x float> [[RDX_MINMAX_SELECT9]], i32 0 -; AVX-NEXT: [[TMP49:%.*]] = select i1 [[TMP47]], float [[TMP45]], float undef -; AVX-NEXT: ret float [[TMP48]] +; AVX-NEXT: [[TMP3:%.*]] = extractelement <16 x float> [[RDX_MINMAX_SELECT9]], i32 0 +; AVX-NEXT: ret float [[TMP3]] ; ; AVX2-LABEL: @maxf16( ; AVX2-NEXT: [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr1 to <16 x float>*), align 16 -; AVX2-NEXT: [[TMP3:%.*]] = extractelement <16 x float> [[TMP2]], i32 0 -; AVX2-NEXT: [[TMP4:%.*]] = extractelement <16 x float> [[TMP2]], i32 1 -; AVX2-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] -; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float undef, float undef -; AVX2-NEXT: [[TMP7:%.*]] = extractelement <16 x float> [[TMP2]], i32 2 -; AVX2-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]] -; AVX2-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float undef -; AVX2-NEXT: [[TMP10:%.*]] = extractelement <16 x float> [[TMP2]], i32 3 -; AVX2-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]] -; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float undef -; AVX2-NEXT: [[TMP13:%.*]] = extractelement <16 x float> [[TMP2]], i32 4 -; AVX2-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]] -; AVX2-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float undef -; AVX2-NEXT: [[TMP16:%.*]] = extractelement <16 x float> [[TMP2]], i32 5 -; AVX2-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]] -; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float undef -; AVX2-NEXT: [[TMP19:%.*]] = extractelement <16 x float> [[TMP2]], i32 6 -; AVX2-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]] -; AVX2-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float undef -; AVX2-NEXT: [[TMP22:%.*]] = extractelement <16 x float> [[TMP2]], i32 7 -; AVX2-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]] -; AVX2-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float undef -; AVX2-NEXT: [[TMP25:%.*]] = extractelement <16 x float> [[TMP2]], i32 8 -; AVX2-NEXT: [[TMP26:%.*]] = fcmp fast ogt float [[TMP24]], [[TMP25]] -; AVX2-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], float [[TMP24]], float undef -; AVX2-NEXT: [[TMP28:%.*]] = extractelement <16 x float> [[TMP2]], i32 9 -; AVX2-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP27]], [[TMP28]] -; AVX2-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP27]], float undef -; AVX2-NEXT: [[TMP31:%.*]] = extractelement <16 x float> [[TMP2]], i32 10 -; AVX2-NEXT: [[TMP32:%.*]] = fcmp fast ogt float [[TMP30]], [[TMP31]] -; AVX2-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], float [[TMP30]], float undef -; AVX2-NEXT: [[TMP34:%.*]] = extractelement <16 x float> [[TMP2]], i32 11 -; AVX2-NEXT: [[TMP35:%.*]] = fcmp fast ogt float [[TMP33]], [[TMP34]] -; AVX2-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP33]], float undef -; AVX2-NEXT: [[TMP37:%.*]] = extractelement <16 x float> [[TMP2]], i32 12 -; AVX2-NEXT: [[TMP38:%.*]] = fcmp fast ogt float [[TMP36]], [[TMP37]] -; AVX2-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], float [[TMP36]], float undef -; AVX2-NEXT: [[TMP40:%.*]] = extractelement <16 x float> [[TMP2]], i32 13 -; AVX2-NEXT: [[TMP41:%.*]] = fcmp fast ogt float [[TMP39]], [[TMP40]] -; AVX2-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP39]], float undef -; AVX2-NEXT: [[TMP43:%.*]] = extractelement <16 x float> [[TMP2]], i32 14 -; AVX2-NEXT: [[TMP44:%.*]] = fcmp fast ogt float [[TMP42]], [[TMP43]] -; AVX2-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], float [[TMP42]], float undef -; AVX2-NEXT: [[TMP46:%.*]] = extractelement <16 x float> [[TMP2]], i32 15 -; AVX2-NEXT: [[TMP47:%.*]] = fcmp fast ogt float [[TMP45]], [[TMP46]] ; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> undef, <16 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <16 x float> [[TMP2]], [[RDX_SHUF]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x float> [[TMP2]], <16 x float> [[RDX_SHUF]] @@ -1340,57 +571,11 @@ ; AVX2-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> undef, <16 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP8:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> [[RDX_SHUF7]] -; AVX2-NEXT: [[TMP48:%.*]] = extractelement <16 x float> [[RDX_MINMAX_SELECT9]], i32 0 -; AVX2-NEXT: [[TMP49:%.*]] = select i1 [[TMP47]], float [[TMP45]], float undef -; AVX2-NEXT: ret float [[TMP48]] +; AVX2-NEXT: [[TMP3:%.*]] = extractelement <16 x float> [[RDX_MINMAX_SELECT9]], i32 0 +; AVX2-NEXT: ret float [[TMP3]] ; ; SKX-LABEL: @maxf16( ; SKX-NEXT: [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr1 to <16 x float>*), align 16 -; SKX-NEXT: [[TMP3:%.*]] = extractelement <16 x float> [[TMP2]], i32 0 -; SKX-NEXT: [[TMP4:%.*]] = extractelement <16 x float> [[TMP2]], i32 1 -; SKX-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] -; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float undef, float undef -; SKX-NEXT: [[TMP7:%.*]] = extractelement <16 x float> [[TMP2]], i32 2 -; SKX-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]] -; SKX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float undef -; SKX-NEXT: [[TMP10:%.*]] = extractelement <16 x float> [[TMP2]], i32 3 -; SKX-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]] -; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float undef -; SKX-NEXT: [[TMP13:%.*]] = extractelement <16 x float> [[TMP2]], i32 4 -; SKX-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]] -; SKX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float undef -; SKX-NEXT: [[TMP16:%.*]] = extractelement <16 x float> [[TMP2]], i32 5 -; SKX-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]] -; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float undef -; SKX-NEXT: [[TMP19:%.*]] = extractelement <16 x float> [[TMP2]], i32 6 -; SKX-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]] -; SKX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float undef -; SKX-NEXT: [[TMP22:%.*]] = extractelement <16 x float> [[TMP2]], i32 7 -; SKX-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]] -; SKX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float undef -; SKX-NEXT: [[TMP25:%.*]] = extractelement <16 x float> [[TMP2]], i32 8 -; SKX-NEXT: [[TMP26:%.*]] = fcmp fast ogt float [[TMP24]], [[TMP25]] -; SKX-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], float [[TMP24]], float undef -; SKX-NEXT: [[TMP28:%.*]] = extractelement <16 x float> [[TMP2]], i32 9 -; SKX-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP27]], [[TMP28]] -; SKX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP27]], float undef -; SKX-NEXT: [[TMP31:%.*]] = extractelement <16 x float> [[TMP2]], i32 10 -; SKX-NEXT: [[TMP32:%.*]] = fcmp fast ogt float [[TMP30]], [[TMP31]] -; SKX-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], float [[TMP30]], float undef -; SKX-NEXT: [[TMP34:%.*]] = extractelement <16 x float> [[TMP2]], i32 11 -; SKX-NEXT: [[TMP35:%.*]] = fcmp fast ogt float [[TMP33]], [[TMP34]] -; SKX-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP33]], float undef -; SKX-NEXT: [[TMP37:%.*]] = extractelement <16 x float> [[TMP2]], i32 12 -; SKX-NEXT: [[TMP38:%.*]] = fcmp fast ogt float [[TMP36]], [[TMP37]] -; SKX-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], float [[TMP36]], float undef -; SKX-NEXT: [[TMP40:%.*]] = extractelement <16 x float> [[TMP2]], i32 13 -; SKX-NEXT: [[TMP41:%.*]] = fcmp fast ogt float [[TMP39]], [[TMP40]] -; SKX-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP39]], float undef -; SKX-NEXT: [[TMP43:%.*]] = extractelement <16 x float> [[TMP2]], i32 14 -; SKX-NEXT: [[TMP44:%.*]] = fcmp fast ogt float [[TMP42]], [[TMP43]] -; SKX-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], float [[TMP42]], float undef -; SKX-NEXT: [[TMP46:%.*]] = extractelement <16 x float> [[TMP2]], i32 15 -; SKX-NEXT: [[TMP47:%.*]] = fcmp fast ogt float [[TMP45]], [[TMP46]] ; SKX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP2]], <16 x float> undef, <16 x i32> ; SKX-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <16 x float> [[TMP2]], [[RDX_SHUF]] ; SKX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x float> [[TMP2]], <16 x float> [[RDX_SHUF]] @@ -1403,9 +588,8 @@ ; SKX-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> undef, <16 x i32> ; SKX-NEXT: [[RDX_MINMAX_CMP8:%.*]] = fcmp fast ogt <16 x float> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] ; SKX-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x float> [[RDX_MINMAX_SELECT6]], <16 x float> [[RDX_SHUF7]] -; SKX-NEXT: [[TMP48:%.*]] = extractelement <16 x float> [[RDX_MINMAX_SELECT9]], i32 0 -; SKX-NEXT: [[TMP49:%.*]] = select i1 [[TMP47]], float [[TMP45]], float undef -; SKX-NEXT: ret float [[TMP48]] +; SKX-NEXT: [[TMP3:%.*]] = extractelement <16 x float> [[RDX_MINMAX_SELECT9]], i32 0 +; SKX-NEXT: ret float [[TMP3]] ; %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16 %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4 @@ -1556,99 +740,6 @@ ; ; AVX-LABEL: @maxf32( ; AVX-NEXT: [[TMP2:%.*]] = load <32 x float>, <32 x float>* bitcast ([32 x float]* @arr1 to <32 x float>*), align 16 -; AVX-NEXT: [[TMP3:%.*]] = extractelement <32 x float> [[TMP2]], i32 0 -; AVX-NEXT: [[TMP4:%.*]] = extractelement <32 x float> [[TMP2]], i32 1 -; AVX-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] -; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float undef, float undef -; AVX-NEXT: [[TMP7:%.*]] = extractelement <32 x float> [[TMP2]], i32 2 -; AVX-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]] -; AVX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float undef -; AVX-NEXT: [[TMP10:%.*]] = extractelement <32 x float> [[TMP2]], i32 3 -; AVX-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]] -; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float undef -; AVX-NEXT: [[TMP13:%.*]] = extractelement <32 x float> [[TMP2]], i32 4 -; AVX-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]] -; AVX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float undef -; AVX-NEXT: [[TMP16:%.*]] = extractelement <32 x float> [[TMP2]], i32 5 -; AVX-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]] -; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float undef -; AVX-NEXT: [[TMP19:%.*]] = extractelement <32 x float> [[TMP2]], i32 6 -; AVX-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]] -; AVX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float undef -; AVX-NEXT: [[TMP22:%.*]] = extractelement <32 x float> [[TMP2]], i32 7 -; AVX-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]] -; AVX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float undef -; AVX-NEXT: [[TMP25:%.*]] = extractelement <32 x float> [[TMP2]], i32 8 -; AVX-NEXT: [[TMP26:%.*]] = fcmp fast ogt float [[TMP24]], [[TMP25]] -; AVX-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], float [[TMP24]], float undef -; AVX-NEXT: [[TMP28:%.*]] = extractelement <32 x float> [[TMP2]], i32 9 -; AVX-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP27]], [[TMP28]] -; AVX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP27]], float undef -; AVX-NEXT: [[TMP31:%.*]] = extractelement <32 x float> [[TMP2]], i32 10 -; AVX-NEXT: [[TMP32:%.*]] = fcmp fast ogt float [[TMP30]], [[TMP31]] -; AVX-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], float [[TMP30]], float undef -; AVX-NEXT: [[TMP34:%.*]] = extractelement <32 x float> [[TMP2]], i32 11 -; AVX-NEXT: [[TMP35:%.*]] = fcmp fast ogt float [[TMP33]], [[TMP34]] -; AVX-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP33]], float undef -; AVX-NEXT: [[TMP37:%.*]] = extractelement <32 x float> [[TMP2]], i32 12 -; AVX-NEXT: [[TMP38:%.*]] = fcmp fast ogt float [[TMP36]], [[TMP37]] -; AVX-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], float [[TMP36]], float undef -; AVX-NEXT: [[TMP40:%.*]] = extractelement <32 x float> [[TMP2]], i32 13 -; AVX-NEXT: [[TMP41:%.*]] = fcmp fast ogt float [[TMP39]], [[TMP40]] -; AVX-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP39]], float undef -; AVX-NEXT: [[TMP43:%.*]] = extractelement <32 x float> [[TMP2]], i32 14 -; AVX-NEXT: [[TMP44:%.*]] = fcmp fast ogt float [[TMP42]], [[TMP43]] -; AVX-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], float [[TMP42]], float undef -; AVX-NEXT: [[TMP46:%.*]] = extractelement <32 x float> [[TMP2]], i32 15 -; AVX-NEXT: [[TMP47:%.*]] = fcmp fast ogt float [[TMP45]], [[TMP46]] -; AVX-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], float [[TMP45]], float undef -; AVX-NEXT: [[TMP49:%.*]] = extractelement <32 x float> [[TMP2]], i32 16 -; AVX-NEXT: [[TMP50:%.*]] = fcmp fast ogt float [[TMP48]], [[TMP49]] -; AVX-NEXT: [[TMP51:%.*]] = select i1 [[TMP50]], float [[TMP48]], float undef -; AVX-NEXT: [[TMP52:%.*]] = extractelement <32 x float> [[TMP2]], i32 17 -; AVX-NEXT: [[TMP53:%.*]] = fcmp fast ogt float [[TMP51]], [[TMP52]] -; AVX-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], float [[TMP51]], float undef -; AVX-NEXT: [[TMP55:%.*]] = extractelement <32 x float> [[TMP2]], i32 18 -; AVX-NEXT: [[TMP56:%.*]] = fcmp fast ogt float [[TMP54]], [[TMP55]] -; AVX-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], float [[TMP54]], float undef -; AVX-NEXT: [[TMP58:%.*]] = extractelement <32 x float> [[TMP2]], i32 19 -; AVX-NEXT: [[TMP59:%.*]] = fcmp fast ogt float [[TMP57]], [[TMP58]] -; AVX-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], float [[TMP57]], float undef -; AVX-NEXT: [[TMP61:%.*]] = extractelement <32 x float> [[TMP2]], i32 20 -; AVX-NEXT: [[TMP62:%.*]] = fcmp fast ogt float [[TMP60]], [[TMP61]] -; AVX-NEXT: [[TMP63:%.*]] = select i1 [[TMP62]], float [[TMP60]], float undef -; AVX-NEXT: [[TMP64:%.*]] = extractelement <32 x float> [[TMP2]], i32 21 -; AVX-NEXT: [[TMP65:%.*]] = fcmp fast ogt float [[TMP63]], [[TMP64]] -; AVX-NEXT: [[TMP66:%.*]] = select i1 [[TMP65]], float [[TMP63]], float undef -; AVX-NEXT: [[TMP67:%.*]] = extractelement <32 x float> [[TMP2]], i32 22 -; AVX-NEXT: [[TMP68:%.*]] = fcmp fast ogt float [[TMP66]], [[TMP67]] -; AVX-NEXT: [[TMP69:%.*]] = select i1 [[TMP68]], float [[TMP66]], float undef -; AVX-NEXT: [[TMP70:%.*]] = extractelement <32 x float> [[TMP2]], i32 23 -; AVX-NEXT: [[TMP71:%.*]] = fcmp fast ogt float [[TMP69]], [[TMP70]] -; AVX-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], float [[TMP69]], float undef -; AVX-NEXT: [[TMP73:%.*]] = extractelement <32 x float> [[TMP2]], i32 24 -; AVX-NEXT: [[TMP74:%.*]] = fcmp fast ogt float [[TMP72]], [[TMP73]] -; AVX-NEXT: [[TMP75:%.*]] = select i1 [[TMP74]], float [[TMP72]], float undef -; AVX-NEXT: [[TMP76:%.*]] = extractelement <32 x float> [[TMP2]], i32 25 -; AVX-NEXT: [[TMP77:%.*]] = fcmp fast ogt float [[TMP75]], [[TMP76]] -; AVX-NEXT: [[TMP78:%.*]] = select i1 [[TMP77]], float [[TMP75]], float undef -; AVX-NEXT: [[TMP79:%.*]] = extractelement <32 x float> [[TMP2]], i32 26 -; AVX-NEXT: [[TMP80:%.*]] = fcmp fast ogt float [[TMP78]], [[TMP79]] -; AVX-NEXT: [[TMP81:%.*]] = select i1 [[TMP80]], float [[TMP78]], float undef -; AVX-NEXT: [[TMP82:%.*]] = extractelement <32 x float> [[TMP2]], i32 27 -; AVX-NEXT: [[TMP83:%.*]] = fcmp fast ogt float [[TMP81]], [[TMP82]] -; AVX-NEXT: [[TMP84:%.*]] = select i1 [[TMP83]], float [[TMP81]], float undef -; AVX-NEXT: [[TMP85:%.*]] = extractelement <32 x float> [[TMP2]], i32 28 -; AVX-NEXT: [[TMP86:%.*]] = fcmp fast ogt float [[TMP84]], [[TMP85]] -; AVX-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], float [[TMP84]], float undef -; AVX-NEXT: [[TMP88:%.*]] = extractelement <32 x float> [[TMP2]], i32 29 -; AVX-NEXT: [[TMP89:%.*]] = fcmp fast ogt float [[TMP87]], [[TMP88]] -; AVX-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], float [[TMP87]], float undef -; AVX-NEXT: [[TMP91:%.*]] = extractelement <32 x float> [[TMP2]], i32 30 -; AVX-NEXT: [[TMP92:%.*]] = fcmp fast ogt float [[TMP90]], [[TMP91]] -; AVX-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], float [[TMP90]], float undef -; AVX-NEXT: [[TMP94:%.*]] = extractelement <32 x float> [[TMP2]], i32 31 -; AVX-NEXT: [[TMP95:%.*]] = fcmp fast ogt float [[TMP93]], [[TMP94]] ; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP2]], <32 x float> undef, <32 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <32 x float> [[TMP2]], [[RDX_SHUF]] ; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP]], <32 x float> [[TMP2]], <32 x float> [[RDX_SHUF]] @@ -1664,105 +755,11 @@ ; AVX-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <32 x float> [[RDX_MINMAX_SELECT9]], <32 x float> undef, <32 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP11:%.*]] = fcmp fast ogt <32 x float> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]] ; AVX-NEXT: [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP11]], <32 x float> [[RDX_MINMAX_SELECT9]], <32 x float> [[RDX_SHUF10]] -; AVX-NEXT: [[TMP96:%.*]] = extractelement <32 x float> [[RDX_MINMAX_SELECT12]], i32 0 -; AVX-NEXT: [[TMP97:%.*]] = select i1 [[TMP95]], float [[TMP93]], float undef -; AVX-NEXT: ret float [[TMP96]] +; AVX-NEXT: [[TMP3:%.*]] = extractelement <32 x float> [[RDX_MINMAX_SELECT12]], i32 0 +; AVX-NEXT: ret float [[TMP3]] ; ; AVX2-LABEL: @maxf32( ; AVX2-NEXT: [[TMP2:%.*]] = load <32 x float>, <32 x float>* bitcast ([32 x float]* @arr1 to <32 x float>*), align 16 -; AVX2-NEXT: [[TMP3:%.*]] = extractelement <32 x float> [[TMP2]], i32 0 -; AVX2-NEXT: [[TMP4:%.*]] = extractelement <32 x float> [[TMP2]], i32 1 -; AVX2-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] -; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float undef, float undef -; AVX2-NEXT: [[TMP7:%.*]] = extractelement <32 x float> [[TMP2]], i32 2 -; AVX2-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]] -; AVX2-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float undef -; AVX2-NEXT: [[TMP10:%.*]] = extractelement <32 x float> [[TMP2]], i32 3 -; AVX2-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]] -; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float undef -; AVX2-NEXT: [[TMP13:%.*]] = extractelement <32 x float> [[TMP2]], i32 4 -; AVX2-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]] -; AVX2-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float undef -; AVX2-NEXT: [[TMP16:%.*]] = extractelement <32 x float> [[TMP2]], i32 5 -; AVX2-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]] -; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float undef -; AVX2-NEXT: [[TMP19:%.*]] = extractelement <32 x float> [[TMP2]], i32 6 -; AVX2-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]] -; AVX2-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float undef -; AVX2-NEXT: [[TMP22:%.*]] = extractelement <32 x float> [[TMP2]], i32 7 -; AVX2-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]] -; AVX2-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float undef -; AVX2-NEXT: [[TMP25:%.*]] = extractelement <32 x float> [[TMP2]], i32 8 -; AVX2-NEXT: [[TMP26:%.*]] = fcmp fast ogt float [[TMP24]], [[TMP25]] -; AVX2-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], float [[TMP24]], float undef -; AVX2-NEXT: [[TMP28:%.*]] = extractelement <32 x float> [[TMP2]], i32 9 -; AVX2-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP27]], [[TMP28]] -; AVX2-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP27]], float undef -; AVX2-NEXT: [[TMP31:%.*]] = extractelement <32 x float> [[TMP2]], i32 10 -; AVX2-NEXT: [[TMP32:%.*]] = fcmp fast ogt float [[TMP30]], [[TMP31]] -; AVX2-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], float [[TMP30]], float undef -; AVX2-NEXT: [[TMP34:%.*]] = extractelement <32 x float> [[TMP2]], i32 11 -; AVX2-NEXT: [[TMP35:%.*]] = fcmp fast ogt float [[TMP33]], [[TMP34]] -; AVX2-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP33]], float undef -; AVX2-NEXT: [[TMP37:%.*]] = extractelement <32 x float> [[TMP2]], i32 12 -; AVX2-NEXT: [[TMP38:%.*]] = fcmp fast ogt float [[TMP36]], [[TMP37]] -; AVX2-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], float [[TMP36]], float undef -; AVX2-NEXT: [[TMP40:%.*]] = extractelement <32 x float> [[TMP2]], i32 13 -; AVX2-NEXT: [[TMP41:%.*]] = fcmp fast ogt float [[TMP39]], [[TMP40]] -; AVX2-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP39]], float undef -; AVX2-NEXT: [[TMP43:%.*]] = extractelement <32 x float> [[TMP2]], i32 14 -; AVX2-NEXT: [[TMP44:%.*]] = fcmp fast ogt float [[TMP42]], [[TMP43]] -; AVX2-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], float [[TMP42]], float undef -; AVX2-NEXT: [[TMP46:%.*]] = extractelement <32 x float> [[TMP2]], i32 15 -; AVX2-NEXT: [[TMP47:%.*]] = fcmp fast ogt float [[TMP45]], [[TMP46]] -; AVX2-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], float [[TMP45]], float undef -; AVX2-NEXT: [[TMP49:%.*]] = extractelement <32 x float> [[TMP2]], i32 16 -; AVX2-NEXT: [[TMP50:%.*]] = fcmp fast ogt float [[TMP48]], [[TMP49]] -; AVX2-NEXT: [[TMP51:%.*]] = select i1 [[TMP50]], float [[TMP48]], float undef -; AVX2-NEXT: [[TMP52:%.*]] = extractelement <32 x float> [[TMP2]], i32 17 -; AVX2-NEXT: [[TMP53:%.*]] = fcmp fast ogt float [[TMP51]], [[TMP52]] -; AVX2-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], float [[TMP51]], float undef -; AVX2-NEXT: [[TMP55:%.*]] = extractelement <32 x float> [[TMP2]], i32 18 -; AVX2-NEXT: [[TMP56:%.*]] = fcmp fast ogt float [[TMP54]], [[TMP55]] -; AVX2-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], float [[TMP54]], float undef -; AVX2-NEXT: [[TMP58:%.*]] = extractelement <32 x float> [[TMP2]], i32 19 -; AVX2-NEXT: [[TMP59:%.*]] = fcmp fast ogt float [[TMP57]], [[TMP58]] -; AVX2-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], float [[TMP57]], float undef -; AVX2-NEXT: [[TMP61:%.*]] = extractelement <32 x float> [[TMP2]], i32 20 -; AVX2-NEXT: [[TMP62:%.*]] = fcmp fast ogt float [[TMP60]], [[TMP61]] -; AVX2-NEXT: [[TMP63:%.*]] = select i1 [[TMP62]], float [[TMP60]], float undef -; AVX2-NEXT: [[TMP64:%.*]] = extractelement <32 x float> [[TMP2]], i32 21 -; AVX2-NEXT: [[TMP65:%.*]] = fcmp fast ogt float [[TMP63]], [[TMP64]] -; AVX2-NEXT: [[TMP66:%.*]] = select i1 [[TMP65]], float [[TMP63]], float undef -; AVX2-NEXT: [[TMP67:%.*]] = extractelement <32 x float> [[TMP2]], i32 22 -; AVX2-NEXT: [[TMP68:%.*]] = fcmp fast ogt float [[TMP66]], [[TMP67]] -; AVX2-NEXT: [[TMP69:%.*]] = select i1 [[TMP68]], float [[TMP66]], float undef -; AVX2-NEXT: [[TMP70:%.*]] = extractelement <32 x float> [[TMP2]], i32 23 -; AVX2-NEXT: [[TMP71:%.*]] = fcmp fast ogt float [[TMP69]], [[TMP70]] -; AVX2-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], float [[TMP69]], float undef -; AVX2-NEXT: [[TMP73:%.*]] = extractelement <32 x float> [[TMP2]], i32 24 -; AVX2-NEXT: [[TMP74:%.*]] = fcmp fast ogt float [[TMP72]], [[TMP73]] -; AVX2-NEXT: [[TMP75:%.*]] = select i1 [[TMP74]], float [[TMP72]], float undef -; AVX2-NEXT: [[TMP76:%.*]] = extractelement <32 x float> [[TMP2]], i32 25 -; AVX2-NEXT: [[TMP77:%.*]] = fcmp fast ogt float [[TMP75]], [[TMP76]] -; AVX2-NEXT: [[TMP78:%.*]] = select i1 [[TMP77]], float [[TMP75]], float undef -; AVX2-NEXT: [[TMP79:%.*]] = extractelement <32 x float> [[TMP2]], i32 26 -; AVX2-NEXT: [[TMP80:%.*]] = fcmp fast ogt float [[TMP78]], [[TMP79]] -; AVX2-NEXT: [[TMP81:%.*]] = select i1 [[TMP80]], float [[TMP78]], float undef -; AVX2-NEXT: [[TMP82:%.*]] = extractelement <32 x float> [[TMP2]], i32 27 -; AVX2-NEXT: [[TMP83:%.*]] = fcmp fast ogt float [[TMP81]], [[TMP82]] -; AVX2-NEXT: [[TMP84:%.*]] = select i1 [[TMP83]], float [[TMP81]], float undef -; AVX2-NEXT: [[TMP85:%.*]] = extractelement <32 x float> [[TMP2]], i32 28 -; AVX2-NEXT: [[TMP86:%.*]] = fcmp fast ogt float [[TMP84]], [[TMP85]] -; AVX2-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], float [[TMP84]], float undef -; AVX2-NEXT: [[TMP88:%.*]] = extractelement <32 x float> [[TMP2]], i32 29 -; AVX2-NEXT: [[TMP89:%.*]] = fcmp fast ogt float [[TMP87]], [[TMP88]] -; AVX2-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], float [[TMP87]], float undef -; AVX2-NEXT: [[TMP91:%.*]] = extractelement <32 x float> [[TMP2]], i32 30 -; AVX2-NEXT: [[TMP92:%.*]] = fcmp fast ogt float [[TMP90]], [[TMP91]] -; AVX2-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], float [[TMP90]], float undef -; AVX2-NEXT: [[TMP94:%.*]] = extractelement <32 x float> [[TMP2]], i32 31 -; AVX2-NEXT: [[TMP95:%.*]] = fcmp fast ogt float [[TMP93]], [[TMP94]] ; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP2]], <32 x float> undef, <32 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <32 x float> [[TMP2]], [[RDX_SHUF]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP]], <32 x float> [[TMP2]], <32 x float> [[RDX_SHUF]] @@ -1778,105 +775,11 @@ ; AVX2-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <32 x float> [[RDX_MINMAX_SELECT9]], <32 x float> undef, <32 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP11:%.*]] = fcmp fast ogt <32 x float> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP11]], <32 x float> [[RDX_MINMAX_SELECT9]], <32 x float> [[RDX_SHUF10]] -; AVX2-NEXT: [[TMP96:%.*]] = extractelement <32 x float> [[RDX_MINMAX_SELECT12]], i32 0 -; AVX2-NEXT: [[TMP97:%.*]] = select i1 [[TMP95]], float [[TMP93]], float undef -; AVX2-NEXT: ret float [[TMP96]] +; AVX2-NEXT: [[TMP3:%.*]] = extractelement <32 x float> [[RDX_MINMAX_SELECT12]], i32 0 +; AVX2-NEXT: ret float [[TMP3]] ; ; SKX-LABEL: @maxf32( ; SKX-NEXT: [[TMP2:%.*]] = load <32 x float>, <32 x float>* bitcast ([32 x float]* @arr1 to <32 x float>*), align 16 -; SKX-NEXT: [[TMP3:%.*]] = extractelement <32 x float> [[TMP2]], i32 0 -; SKX-NEXT: [[TMP4:%.*]] = extractelement <32 x float> [[TMP2]], i32 1 -; SKX-NEXT: [[TMP5:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]] -; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float undef, float undef -; SKX-NEXT: [[TMP7:%.*]] = extractelement <32 x float> [[TMP2]], i32 2 -; SKX-NEXT: [[TMP8:%.*]] = fcmp fast ogt float [[TMP6]], [[TMP7]] -; SKX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float undef -; SKX-NEXT: [[TMP10:%.*]] = extractelement <32 x float> [[TMP2]], i32 3 -; SKX-NEXT: [[TMP11:%.*]] = fcmp fast ogt float [[TMP9]], [[TMP10]] -; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], float [[TMP9]], float undef -; SKX-NEXT: [[TMP13:%.*]] = extractelement <32 x float> [[TMP2]], i32 4 -; SKX-NEXT: [[TMP14:%.*]] = fcmp fast ogt float [[TMP12]], [[TMP13]] -; SKX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], float [[TMP12]], float undef -; SKX-NEXT: [[TMP16:%.*]] = extractelement <32 x float> [[TMP2]], i32 5 -; SKX-NEXT: [[TMP17:%.*]] = fcmp fast ogt float [[TMP15]], [[TMP16]] -; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], float [[TMP15]], float undef -; SKX-NEXT: [[TMP19:%.*]] = extractelement <32 x float> [[TMP2]], i32 6 -; SKX-NEXT: [[TMP20:%.*]] = fcmp fast ogt float [[TMP18]], [[TMP19]] -; SKX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], float [[TMP18]], float undef -; SKX-NEXT: [[TMP22:%.*]] = extractelement <32 x float> [[TMP2]], i32 7 -; SKX-NEXT: [[TMP23:%.*]] = fcmp fast ogt float [[TMP21]], [[TMP22]] -; SKX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], float [[TMP21]], float undef -; SKX-NEXT: [[TMP25:%.*]] = extractelement <32 x float> [[TMP2]], i32 8 -; SKX-NEXT: [[TMP26:%.*]] = fcmp fast ogt float [[TMP24]], [[TMP25]] -; SKX-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], float [[TMP24]], float undef -; SKX-NEXT: [[TMP28:%.*]] = extractelement <32 x float> [[TMP2]], i32 9 -; SKX-NEXT: [[TMP29:%.*]] = fcmp fast ogt float [[TMP27]], [[TMP28]] -; SKX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], float [[TMP27]], float undef -; SKX-NEXT: [[TMP31:%.*]] = extractelement <32 x float> [[TMP2]], i32 10 -; SKX-NEXT: [[TMP32:%.*]] = fcmp fast ogt float [[TMP30]], [[TMP31]] -; SKX-NEXT: [[TMP33:%.*]] = select i1 [[TMP32]], float [[TMP30]], float undef -; SKX-NEXT: [[TMP34:%.*]] = extractelement <32 x float> [[TMP2]], i32 11 -; SKX-NEXT: [[TMP35:%.*]] = fcmp fast ogt float [[TMP33]], [[TMP34]] -; SKX-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], float [[TMP33]], float undef -; SKX-NEXT: [[TMP37:%.*]] = extractelement <32 x float> [[TMP2]], i32 12 -; SKX-NEXT: [[TMP38:%.*]] = fcmp fast ogt float [[TMP36]], [[TMP37]] -; SKX-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], float [[TMP36]], float undef -; SKX-NEXT: [[TMP40:%.*]] = extractelement <32 x float> [[TMP2]], i32 13 -; SKX-NEXT: [[TMP41:%.*]] = fcmp fast ogt float [[TMP39]], [[TMP40]] -; SKX-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], float [[TMP39]], float undef -; SKX-NEXT: [[TMP43:%.*]] = extractelement <32 x float> [[TMP2]], i32 14 -; SKX-NEXT: [[TMP44:%.*]] = fcmp fast ogt float [[TMP42]], [[TMP43]] -; SKX-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], float [[TMP42]], float undef -; SKX-NEXT: [[TMP46:%.*]] = extractelement <32 x float> [[TMP2]], i32 15 -; SKX-NEXT: [[TMP47:%.*]] = fcmp fast ogt float [[TMP45]], [[TMP46]] -; SKX-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], float [[TMP45]], float undef -; SKX-NEXT: [[TMP49:%.*]] = extractelement <32 x float> [[TMP2]], i32 16 -; SKX-NEXT: [[TMP50:%.*]] = fcmp fast ogt float [[TMP48]], [[TMP49]] -; SKX-NEXT: [[TMP51:%.*]] = select i1 [[TMP50]], float [[TMP48]], float undef -; SKX-NEXT: [[TMP52:%.*]] = extractelement <32 x float> [[TMP2]], i32 17 -; SKX-NEXT: [[TMP53:%.*]] = fcmp fast ogt float [[TMP51]], [[TMP52]] -; SKX-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], float [[TMP51]], float undef -; SKX-NEXT: [[TMP55:%.*]] = extractelement <32 x float> [[TMP2]], i32 18 -; SKX-NEXT: [[TMP56:%.*]] = fcmp fast ogt float [[TMP54]], [[TMP55]] -; SKX-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], float [[TMP54]], float undef -; SKX-NEXT: [[TMP58:%.*]] = extractelement <32 x float> [[TMP2]], i32 19 -; SKX-NEXT: [[TMP59:%.*]] = fcmp fast ogt float [[TMP57]], [[TMP58]] -; SKX-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], float [[TMP57]], float undef -; SKX-NEXT: [[TMP61:%.*]] = extractelement <32 x float> [[TMP2]], i32 20 -; SKX-NEXT: [[TMP62:%.*]] = fcmp fast ogt float [[TMP60]], [[TMP61]] -; SKX-NEXT: [[TMP63:%.*]] = select i1 [[TMP62]], float [[TMP60]], float undef -; SKX-NEXT: [[TMP64:%.*]] = extractelement <32 x float> [[TMP2]], i32 21 -; SKX-NEXT: [[TMP65:%.*]] = fcmp fast ogt float [[TMP63]], [[TMP64]] -; SKX-NEXT: [[TMP66:%.*]] = select i1 [[TMP65]], float [[TMP63]], float undef -; SKX-NEXT: [[TMP67:%.*]] = extractelement <32 x float> [[TMP2]], i32 22 -; SKX-NEXT: [[TMP68:%.*]] = fcmp fast ogt float [[TMP66]], [[TMP67]] -; SKX-NEXT: [[TMP69:%.*]] = select i1 [[TMP68]], float [[TMP66]], float undef -; SKX-NEXT: [[TMP70:%.*]] = extractelement <32 x float> [[TMP2]], i32 23 -; SKX-NEXT: [[TMP71:%.*]] = fcmp fast ogt float [[TMP69]], [[TMP70]] -; SKX-NEXT: [[TMP72:%.*]] = select i1 [[TMP71]], float [[TMP69]], float undef -; SKX-NEXT: [[TMP73:%.*]] = extractelement <32 x float> [[TMP2]], i32 24 -; SKX-NEXT: [[TMP74:%.*]] = fcmp fast ogt float [[TMP72]], [[TMP73]] -; SKX-NEXT: [[TMP75:%.*]] = select i1 [[TMP74]], float [[TMP72]], float undef -; SKX-NEXT: [[TMP76:%.*]] = extractelement <32 x float> [[TMP2]], i32 25 -; SKX-NEXT: [[TMP77:%.*]] = fcmp fast ogt float [[TMP75]], [[TMP76]] -; SKX-NEXT: [[TMP78:%.*]] = select i1 [[TMP77]], float [[TMP75]], float undef -; SKX-NEXT: [[TMP79:%.*]] = extractelement <32 x float> [[TMP2]], i32 26 -; SKX-NEXT: [[TMP80:%.*]] = fcmp fast ogt float [[TMP78]], [[TMP79]] -; SKX-NEXT: [[TMP81:%.*]] = select i1 [[TMP80]], float [[TMP78]], float undef -; SKX-NEXT: [[TMP82:%.*]] = extractelement <32 x float> [[TMP2]], i32 27 -; SKX-NEXT: [[TMP83:%.*]] = fcmp fast ogt float [[TMP81]], [[TMP82]] -; SKX-NEXT: [[TMP84:%.*]] = select i1 [[TMP83]], float [[TMP81]], float undef -; SKX-NEXT: [[TMP85:%.*]] = extractelement <32 x float> [[TMP2]], i32 28 -; SKX-NEXT: [[TMP86:%.*]] = fcmp fast ogt float [[TMP84]], [[TMP85]] -; SKX-NEXT: [[TMP87:%.*]] = select i1 [[TMP86]], float [[TMP84]], float undef -; SKX-NEXT: [[TMP88:%.*]] = extractelement <32 x float> [[TMP2]], i32 29 -; SKX-NEXT: [[TMP89:%.*]] = fcmp fast ogt float [[TMP87]], [[TMP88]] -; SKX-NEXT: [[TMP90:%.*]] = select i1 [[TMP89]], float [[TMP87]], float undef -; SKX-NEXT: [[TMP91:%.*]] = extractelement <32 x float> [[TMP2]], i32 30 -; SKX-NEXT: [[TMP92:%.*]] = fcmp fast ogt float [[TMP90]], [[TMP91]] -; SKX-NEXT: [[TMP93:%.*]] = select i1 [[TMP92]], float [[TMP90]], float undef -; SKX-NEXT: [[TMP94:%.*]] = extractelement <32 x float> [[TMP2]], i32 31 -; SKX-NEXT: [[TMP95:%.*]] = fcmp fast ogt float [[TMP93]], [[TMP94]] ; SKX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP2]], <32 x float> undef, <32 x i32> ; SKX-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <32 x float> [[TMP2]], [[RDX_SHUF]] ; SKX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP]], <32 x float> [[TMP2]], <32 x float> [[RDX_SHUF]] @@ -1892,9 +795,8 @@ ; SKX-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <32 x float> [[RDX_MINMAX_SELECT9]], <32 x float> undef, <32 x i32> ; SKX-NEXT: [[RDX_MINMAX_CMP11:%.*]] = fcmp fast ogt <32 x float> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]] ; SKX-NEXT: [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP11]], <32 x float> [[RDX_MINMAX_SELECT9]], <32 x float> [[RDX_SHUF10]] -; SKX-NEXT: [[TMP96:%.*]] = extractelement <32 x float> [[RDX_MINMAX_SELECT12]], i32 0 -; SKX-NEXT: [[TMP97:%.*]] = select i1 [[TMP95]], float [[TMP93]], float undef -; SKX-NEXT: ret float [[TMP96]] +; SKX-NEXT: [[TMP3:%.*]] = extractelement <32 x float> [[RDX_MINMAX_SELECT12]], i32 0 +; SKX-NEXT: ret float [[TMP3]] ; %2 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16 %3 = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4 @@ -2026,123 +928,84 @@ ; AVX-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0 ; AVX-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1 ; AVX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] -; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef -; AVX-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2 -; AVX-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] -; AVX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef -; AVX-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 -; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef -; AVX-NEXT: [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 -; AVX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -; AVX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 [[TMP13]] -; AVX-NEXT: [[TMP16:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 -; AVX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 [[TMP16]] -; AVX-NEXT: [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 -; AVX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +; AVX-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 +; AVX-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 +; AVX-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 ; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP2]], [[RDX_SHUF]] ; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP2]], <4 x i32> [[RDX_SHUF]] ; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; AVX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] -; AVX-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 -; AVX-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP21]], [[TMP13]] -; AVX-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP21]], i32 [[TMP13]] -; AVX-NEXT: [[TMP24:%.*]] = icmp sgt i32 [[TMP23]], [[TMP16]] -; AVX-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i32 [[TMP23]], i32 [[TMP16]] -; AVX-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[TMP25]], [[TMP19]] -; AVX-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[TMP25]], i32 [[TMP19]] -; AVX-NEXT: [[TMP28:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 [[TMP19]] -; AVX-NEXT: [[TMP29:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 -; AVX-NEXT: [[TMP30:%.*]] = icmp sgt i32 [[TMP27]], [[TMP29]] -; AVX-NEXT: [[TMP31:%.*]] = select i1 [[TMP30]], i32 [[TMP27]], i32 [[TMP29]] -; AVX-NEXT: [[TMP32:%.*]] = select i1 [[TMP5]], i32 3, i32 4 -; AVX-NEXT: store i32 [[TMP32]], i32* @var, align 8 -; AVX-NEXT: ret i32 [[TMP31]] +; AVX-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 +; AVX-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP6]] +; AVX-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP6]] +; AVX-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP7]] +; AVX-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 [[TMP7]] +; AVX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP13]], [[TMP8]] +; AVX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 [[TMP8]] +; AVX-NEXT: [[TMP16:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; AVX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 [[TMP16]] +; AVX-NEXT: [[TMP19:%.*]] = select i1 [[TMP5]], i32 3, i32 4 +; AVX-NEXT: store i32 [[TMP19]], i32* @var, align 8 +; AVX-NEXT: ret i32 [[TMP18]] ; ; AVX2-LABEL: @maxi8_mutiple_uses( ; AVX2-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([32 x i32]* @arr to <4 x i32>*), align 16 ; AVX2-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0 ; AVX2-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1 ; AVX2-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] -; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef -; AVX2-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2 -; AVX2-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] -; AVX2-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef -; AVX2-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 -; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef -; AVX2-NEXT: [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 -; AVX2-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -; AVX2-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 [[TMP13]] -; AVX2-NEXT: [[TMP16:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 -; AVX2-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 [[TMP16]] -; AVX2-NEXT: [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 -; AVX2-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +; AVX2-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 +; AVX2-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 +; AVX2-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 ; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP2]], [[RDX_SHUF]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP2]], <4 x i32> [[RDX_SHUF]] ; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] -; AVX2-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 -; AVX2-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP21]], [[TMP13]] -; AVX2-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP21]], i32 [[TMP13]] -; AVX2-NEXT: [[TMP24:%.*]] = icmp sgt i32 [[TMP23]], [[TMP16]] -; AVX2-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i32 [[TMP23]], i32 [[TMP16]] -; AVX2-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[TMP25]], [[TMP19]] -; AVX2-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[TMP25]], i32 [[TMP19]] -; AVX2-NEXT: [[TMP28:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 [[TMP19]] -; AVX2-NEXT: [[TMP29:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 -; AVX2-NEXT: [[TMP30:%.*]] = icmp sgt i32 [[TMP27]], [[TMP29]] -; AVX2-NEXT: [[TMP31:%.*]] = select i1 [[TMP30]], i32 [[TMP27]], i32 [[TMP29]] -; AVX2-NEXT: [[TMP32:%.*]] = select i1 [[TMP5]], i32 3, i32 4 -; AVX2-NEXT: store i32 [[TMP32]], i32* @var, align 8 -; AVX2-NEXT: ret i32 [[TMP31]] +; AVX2-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 +; AVX2-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP6]] +; AVX2-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP6]] +; AVX2-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP7]] +; AVX2-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 [[TMP7]] +; AVX2-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP13]], [[TMP8]] +; AVX2-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 [[TMP8]] +; AVX2-NEXT: [[TMP16:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; AVX2-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 [[TMP16]] +; AVX2-NEXT: [[TMP19:%.*]] = select i1 [[TMP5]], i32 3, i32 4 +; AVX2-NEXT: store i32 [[TMP19]], i32* @var, align 8 +; AVX2-NEXT: ret i32 [[TMP18]] ; ; SKX-LABEL: @maxi8_mutiple_uses( ; SKX-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([32 x i32]* @arr to <4 x i32>*), align 16 ; SKX-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0 ; SKX-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1 ; SKX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]] -; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 undef, i32 undef -; SKX-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2 -; SKX-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] -; SKX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP6]], i32 undef -; SKX-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 -; SKX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef -; SKX-NEXT: [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 -; SKX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -; SKX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 [[TMP13]] -; SKX-NEXT: [[TMP16:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 -; SKX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 [[TMP16]] -; SKX-NEXT: [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 -; SKX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +; SKX-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 +; SKX-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 +; SKX-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 ; SKX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> ; SKX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP2]], [[RDX_SHUF]] ; SKX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP2]], <4 x i32> [[RDX_SHUF]] ; SKX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> ; SKX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; SKX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] -; SKX-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 -; SKX-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP21]], [[TMP13]] -; SKX-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP21]], i32 [[TMP13]] -; SKX-NEXT: [[TMP24:%.*]] = icmp sgt i32 [[TMP23]], [[TMP16]] -; SKX-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i32 [[TMP23]], i32 [[TMP16]] -; SKX-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[TMP25]], [[TMP19]] -; SKX-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[TMP25]], i32 [[TMP19]] -; SKX-NEXT: [[TMP28:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 [[TMP19]] -; SKX-NEXT: [[TMP29:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 -; SKX-NEXT: [[TMP30:%.*]] = icmp sgt i32 [[TMP27]], [[TMP29]] -; SKX-NEXT: [[TMP31:%.*]] = select i1 [[TMP30]], i32 [[TMP27]], i32 [[TMP29]] -; SKX-NEXT: [[TMP32:%.*]] = select i1 [[TMP5]], i32 3, i32 4 -; SKX-NEXT: store i32 [[TMP32]], i32* @var, align 8 -; SKX-NEXT: ret i32 [[TMP31]] +; SKX-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 +; SKX-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP6]] +; SKX-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP6]] +; SKX-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP7]] +; SKX-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 [[TMP7]] +; SKX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP13]], [[TMP8]] +; SKX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 [[TMP8]] +; SKX-NEXT: [[TMP16:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; SKX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 [[TMP16]] +; SKX-NEXT: [[TMP19:%.*]] = select i1 [[TMP5]], i32 3, i32 4 +; SKX-NEXT: store i32 [[TMP19]], i32* @var, align 8 +; SKX-NEXT: ret i32 [[TMP18]] ; %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 @@ -2207,37 +1070,21 @@ ; AVX: pp: ; AVX-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] ; AVX-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8 -; AVX-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0 -; AVX-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP5]], [[TMP7]] -; AVX-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP5]], i32 undef -; AVX-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP6]], i32 1 -; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef -; AVX-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP6]], i32 2 -; AVX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -; AVX-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef -; AVX-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[TMP6]], i32 3 -; AVX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef -; AVX-NEXT: [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 -; AVX-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] -; AVX-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 [[TMP19]] -; AVX-NEXT: [[TMP22:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 -; AVX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] +; AVX-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; AVX-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 ; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]] ; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]] ; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> ; AVX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; AVX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] -; AVX-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 -; AVX-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP19]] -; AVX-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 [[TMP19]] -; AVX-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], [[TMP22]] -; AVX-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 [[TMP22]] -; AVX-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], [[TMP5]] -; AVX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 [[TMP5]] -; AVX-NEXT: [[TMP30:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 [[TMP22]] +; AVX-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 +; AVX-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]] +; AVX-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]] +; AVX-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]] +; AVX-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 [[TMP8]] +; AVX-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]] +; AVX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 [[TMP5]] ; AVX-NEXT: ret i32 [[OP_EXTRA]] ; ; AVX2-LABEL: @maxi8_wrong_parent( @@ -2248,37 +1095,21 @@ ; AVX2: pp: ; AVX2-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] ; AVX2-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8 -; AVX2-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0 -; AVX2-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[TMP5]], [[TMP7]] -; AVX2-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP5]], i32 undef -; AVX2-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP6]], i32 1 -; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 undef -; AVX2-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP6]], i32 2 -; AVX2-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -; AVX2-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP12]], i32 undef -; AVX2-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[TMP6]], i32 3 -; AVX2-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP15]], i32 undef -; AVX2-NEXT: [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 -; AVX2-NEXT: [[TMP20:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] -; AVX2-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[TMP18]], i32 [[TMP19]] -; AVX2-NEXT: [[TMP22:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 -; AVX2-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] +; AVX2-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; AVX2-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 ; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]] ; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> ; AVX2-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; AVX2-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] -; AVX2-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 -; AVX2-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], [[TMP19]] -; AVX2-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 [[TMP19]] -; AVX2-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], [[TMP22]] -; AVX2-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 [[TMP22]] -; AVX2-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], [[TMP5]] -; AVX2-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 [[TMP5]] -; AVX2-NEXT: [[TMP30:%.*]] = select i1 [[TMP23]], i32 [[TMP21]], i32 [[TMP22]] +; AVX2-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 +; AVX2-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]] +; AVX2-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]] +; AVX2-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]] +; AVX2-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 [[TMP8]] +; AVX2-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP13]], [[TMP5]] +; AVX2-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 [[TMP5]] ; AVX2-NEXT: ret i32 [[OP_EXTRA]] ; ; SKX-LABEL: @maxi8_wrong_parent( @@ -2289,45 +1120,29 @@ ; SKX-NEXT: br label [[PP:%.*]] ; SKX: pp: ; SKX-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2) to <4 x i32>*), align 8 -; SKX-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0 -; SKX-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 -; SKX-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; SKX-NEXT: [[TMP7:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; SKX-NEXT: [[TMP8:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 ; SKX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> ; SKX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i32> [[TMP6]], [[RDX_SHUF]] ; SKX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP6]], <4 x i32> [[RDX_SHUF]] ; SKX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> undef, <4 x i32> ; SKX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] ; SKX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP2]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF1]] -; SKX-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 -; SKX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], [[TMP8]] -; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 [[TMP8]] -; SKX-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], [[TMP9]] -; SKX-NEXT: [[TMP14:%.*]] = insertelement <2 x i1> undef, i1 [[TMP13]], i32 0 -; SKX-NEXT: [[TMP15:%.*]] = insertelement <2 x i1> [[TMP14]], i1 [[TMP5]], i32 1 -; SKX-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> undef, i32 [[TMP12]], i32 0 -; SKX-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> [[TMP16]], i32 [[TMP3]], i32 1 -; SKX-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> undef, i32 [[TMP9]], i32 0 -; SKX-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP18]], i32 [[TMP4]], i32 1 -; SKX-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP17]], <2 x i32> [[TMP19]] -; SKX-NEXT: [[TMP21:%.*]] = extractelement <2 x i32> [[TMP20]], i32 1 -; SKX-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP21]], [[TMP7]] -; SKX-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP21]], i32 undef -; SKX-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP6]], i32 1 -; SKX-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP23]], [[TMP24]] -; SKX-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP23]], i32 undef -; SKX-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP6]], i32 2 -; SKX-NEXT: [[TMP28:%.*]] = icmp sgt i32 [[TMP26]], [[TMP27]] -; SKX-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], i32 [[TMP26]], i32 undef -; SKX-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[TMP6]], i32 3 -; SKX-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] -; SKX-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 [[TMP29]], i32 undef -; SKX-NEXT: [[TMP33:%.*]] = icmp sgt i32 [[TMP32]], [[TMP8]] -; SKX-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 [[TMP32]], i32 [[TMP8]] -; SKX-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP34]], [[TMP9]] -; SKX-NEXT: [[TMP36:%.*]] = extractelement <2 x i32> [[TMP20]], i32 0 -; SKX-NEXT: [[TMP37:%.*]] = icmp sgt i32 [[TMP36]], [[TMP21]] -; SKX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP37]], i32 [[TMP36]], i32 [[TMP21]] -; SKX-NEXT: [[TMP38:%.*]] = select i1 [[TMP35]], i32 [[TMP34]], i32 [[TMP9]] +; SKX-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT3]], i32 0 +; SKX-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP9]], [[TMP7]] +; SKX-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP9]], i32 [[TMP7]] +; SKX-NEXT: [[TMP12:%.*]] = icmp sgt i32 [[TMP11]], [[TMP8]] +; SKX-NEXT: [[TMP13:%.*]] = insertelement <2 x i1> undef, i1 [[TMP12]], i32 0 +; SKX-NEXT: [[TMP14:%.*]] = insertelement <2 x i1> [[TMP13]], i1 [[TMP5]], i32 1 +; SKX-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> undef, i32 [[TMP11]], i32 0 +; SKX-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[TMP3]], i32 1 +; SKX-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> undef, i32 [[TMP8]], i32 0 +; SKX-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> [[TMP17]], i32 [[TMP4]], i32 1 +; SKX-NEXT: [[TMP19:%.*]] = select <2 x i1> [[TMP14]], <2 x i32> [[TMP16]], <2 x i32> [[TMP18]] +; SKX-NEXT: [[TMP20:%.*]] = extractelement <2 x i32> [[TMP19]], i32 1 +; SKX-NEXT: [[TMP21:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0 +; SKX-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP21]], [[TMP20]] +; SKX-NEXT: [[OP_EXTRA:%.*]] = select i1 [[TMP22]], i32 [[TMP21]], i32 [[TMP20]] ; SKX-NEXT: ret i32 [[OP_EXTRA]] ; %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 Index: test/Transforms/SLPVectorizer/X86/reduction_loads.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/reduction_loads.ll +++ test/Transforms/SLPVectorizer/X86/reduction_loads.ll @@ -18,13 +18,6 @@ ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i32> , [[TMP1]] -; CHECK-NEXT: [[ADD:%.*]] = add i32 undef, [[SUM]] -; CHECK-NEXT: [[ADD_1:%.*]] = add i32 undef, [[ADD]] -; CHECK-NEXT: [[ADD_2:%.*]] = add i32 undef, [[ADD_1]] -; CHECK-NEXT: [[ADD_3:%.*]] = add i32 undef, [[ADD_2]] -; CHECK-NEXT: [[ADD_4:%.*]] = add i32 undef, [[ADD_3]] -; CHECK-NEXT: [[ADD_5:%.*]] = add i32 undef, [[ADD_4]] -; CHECK-NEXT: [[ADD_6:%.*]] = add i32 undef, [[ADD_5]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP2]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> @@ -33,7 +26,6 @@ ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 ; CHECK-NEXT: [[BIN_EXTRA]] = add i32 [[TMP3]], [[SUM]] -; CHECK-NEXT: [[ADD_7:%.*]] = add i32 undef, [[ADD_6]] ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 [[BIN_EXTRA]]