Index: include/llvm/Transforms/Vectorize/SLPVectorizer.h =================================================================== --- include/llvm/Transforms/Vectorize/SLPVectorizer.h +++ include/llvm/Transforms/Vectorize/SLPVectorizer.h @@ -40,8 +40,8 @@ struct SLPVectorizerPass : public PassInfoMixin { typedef SmallVector StoreList; typedef MapVector StoreListMap; - typedef SmallVector WeakVHList; - typedef MapVector WeakVHListMap; + typedef SmallVector GEPList; + typedef MapVector GEPListMap; ScalarEvolution *SE = nullptr; TargetTransformInfo *TTI = nullptr; @@ -111,7 +111,7 @@ StoreListMap Stores; /// The getelementptr instructions in a basic block organized by base pointer. - WeakVHListMap GEPs; + GEPListMap GEPs; }; } Index: lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/SLPVectorizer.cpp +++ lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -408,6 +408,18 @@ /// vectorizable. We do not vectorize such trees. bool isTreeTinyAndNotFullyVectorizable(); + /// Correctly replaces \p Old with \p New instruction and marks \p Old for + /// deletion. + void replaceAllUsesWith(Instruction *Old, Instruction *New); + + /// Checks if the instruction is marked for deletion. + bool isDeleted(Instruction *I) const; + + /// Marks values for later deletion. + void markForDeletion(ArrayRef AV); + + ~BoUpSLP(); + private: struct TreeEntry; @@ -577,14 +589,12 @@ /// AliasCache, which can happen if a new instruction is allocated at the /// same address as a previously deleted instruction. void eraseInstruction(Instruction *I) { - I->removeFromParent(); - I->dropAllReferences(); - DeletedInstructions.push_back(std::unique_ptr(I)); + DeletedInstructions.insert(I); } /// Temporary store for deleted instructions. Instructions will be deleted /// eventually when the BoUpSLP is destructed. - SmallVector, 8> DeletedInstructions; + SmallPtrSet DeletedInstructions; /// A list of values that need to extracted out of the tree. /// This list holds pairs of (Internal Scalar : External User). External User @@ -596,7 +606,7 @@ SmallPtrSet EphValues; /// Holds all of the instructions that we gathered. - SetVector GatherSeq; + SetVector GatherSeq; /// A list of blocks that we are going to CSE. SetVector CSEBlocks; @@ -951,6 +961,31 @@ } // end namespace llvm } // end namespace slpvectorizer +BoUpSLP::~BoUpSLP() { + for (auto *I : DeletedInstructions) + I->dropAllReferences(); + for (auto *I : DeletedInstructions) { + assert(I->use_empty() && "trying to erase instruction with users."); + I->eraseFromParent(); + } +} + +void BoUpSLP::replaceAllUsesWith(Instruction *Old, Instruction *New) { + Old->replaceAllUsesWith(New); + eraseInstruction(Old); +} + +bool BoUpSLP::isDeleted(Instruction *I) const { + return DeletedInstructions.count(I) > 0; +} + +void BoUpSLP::markForDeletion(ArrayRef AV) { + for (auto *V : AV) { + if (auto *I = dyn_cast(V)) + DeletedInstructions.insert(I); + } +} + void BoUpSLP::buildTree(ArrayRef Roots, ArrayRef UserIgnoreLst) { MapVector ExternallyUsedValues; @@ -2314,7 +2349,7 @@ // Generate the 'InsertElement' instruction. for (unsigned i = 0; i < Ty->getNumElements(); ++i) { Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i)); - if (Instruction *Insrt = dyn_cast(Vec)) { + if (auto *Insrt = dyn_cast(Vec)) { GatherSeq.insert(Insrt); CSEBlocks.insert(Insrt->getParent()); @@ -2934,9 +2969,9 @@ assert(Entry->VectorizedValue && "Can't find vectorizable value"); +#ifndef NDEBUG Type *Ty = Scalar->getType(); if (!Ty->isVoidTy()) { -#ifndef NDEBUG for (User *U : Scalar->users()) { DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n"); @@ -2945,10 +2980,8 @@ is_contained(UserIgnoreList, U)) && "Replacing out-of-tree value with undef"); } -#endif - Value *Undef = UndefValue::get(Ty); - Scalar->replaceAllUsesWith(Undef); } +#endif DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n"); eraseInstruction(cast(Scalar)); } @@ -2963,10 +2996,8 @@ DEBUG(dbgs() << "SLP: Optimizing " << GatherSeq.size() << " gather sequences instructions.\n"); // LICM InsertElementInst sequences. - for (Instruction *it : GatherSeq) { - InsertElementInst *Insert = dyn_cast(it); - - if (!Insert) + for (auto *Insert : GatherSeq) { + if (isDeleted(Insert)) continue; // Check if this block is inside a loop. @@ -3028,8 +3059,7 @@ for (Instruction *v : Visited) { if (In->isIdenticalTo(v) && DT->dominates(v->getParent(), In->getParent())) { - In->replaceAllUsesWith(v); - eraseInstruction(In); + replaceAllUsesWith(In, v); In = nullptr; break; } @@ -4257,11 +4287,10 @@ // After successfull horizontal reduction vectorization attempt for PHI node // vectorizer tries to update root binary op by combining vectorized tree and // the ReductionPHI node. But during vectorization this ReductionPHI can be - // vectorized itself and replaced by the undef value, while the instruction - // itself is marked for deletion. This 'marked for deletion' PHI node then can - // be used in new binary operation, causing "Use still stuck around after Def - // is destroyed" crash upon PHI node deletion. - WeakVH ReductionPHI; + // vectorized itself, while the instruction itself is marked for deletion. + // This 'marked for deletion' PHI node then can be used in new binary + // operation. + PHINode *ReductionPHI; /// The opcode of the reduction. Instruction::BinaryOps ReductionOpcode = Instruction::BinaryOpsEnd; @@ -4470,6 +4499,10 @@ // Emit a reduction. Value *ReducedSubTree = emitReduction(VectorizedRoot, Builder, ReduxWidth); + // Mark all scalar reduction ops for deletion, they are replaced by the + // vector reductions (except for ReductionRoot node). + V.markForDeletion( + makeArrayRef(ReductionOps.begin(), std::prev(ReductionOps.end()))); if (VectorizedTree) { Builder.SetCurrentDebugLocation(Loc); VectorizedTree = Builder.CreateBinOp(ReductionOpcode, VectorizedTree, @@ -4493,13 +4526,13 @@ VectorizedTree = Builder.CreateBinOp(ReductionOpcode, VectorizedTree, Pair.first, "bin.extra"); } - // Update users. - if (ReductionPHI && !isa(ReductionPHI)) { + // Update users if ReductionPHI is not vectorized itself. + if (ReductionPHI && !V.isDeleted(ReductionPHI)) { assert(ReductionRoot && "Need a reduction operation"); ReductionRoot->setOperand(0, VectorizedTree); ReductionRoot->setOperand(1, ReductionPHI); } else - ReductionRoot->replaceAllUsesWith(VectorizedTree); + V.replaceAllUsesWith(ReductionRoot, cast(VectorizedTree)); } return VectorizedTree != nullptr; } @@ -4832,7 +4865,7 @@ if (!P) break; - if (!VisitedInstrs.count(P)) + if (!VisitedInstrs.count(P) && !R.isDeleted(P)) Incoming.push_back(P); } @@ -4871,7 +4904,7 @@ for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) { // We may go through BB multiple times so skip the one we have checked. - if (!VisitedInstrs.insert(&*it).second) + if (!VisitedInstrs.insert(&*it).second || R.isDeleted(&*it)) continue; if (isa(it)) @@ -5011,12 +5044,13 @@ // SetVector here to preserve program order. If the index computations // are vectorizable and begin with loads, we want to minimize the chance // of having to reorder them later. - SetVector Candidates(GEPList.begin(), GEPList.end()); + SetVector Candidates(GEPList.begin(), GEPList.end()); // Some of the candidates may have already been vectorized after we - // initially collected them. If so, the WeakVHs will have nullified the - // values, so remove them from the set of candidates. - Candidates.remove(nullptr); + // initially collected them. If so, they are marked as deleted, so remove + // them from the set of candidates. + Candidates.remove_if( + [&R](GetElementPtrInst *I) { return R.isDeleted(I); }); // Remove from the set of candidates all pairs of getelementptrs with // constant differences. Such getelementptrs are likely not good @@ -5024,18 +5058,18 @@ // computed from the other. We also ensure all candidate getelementptr // indices are unique. for (int I = 0, E = GEPList.size(); I < E && Candidates.size() > 1; ++I) { - auto *GEPI = cast(GEPList[I]); + auto *GEPI = GEPList[I]; if (!Candidates.count(GEPI)) continue; auto *SCEVI = SE->getSCEV(GEPList[I]); for (int J = I + 1; J < E && Candidates.size() > 1; ++J) { - auto *GEPJ = cast(GEPList[J]); + auto *GEPJ = GEPList[J]; auto *SCEVJ = SE->getSCEV(GEPList[J]); if (isa(SE->getMinusSCEV(SCEVI, SCEVJ))) { - Candidates.remove(GEPList[I]); - Candidates.remove(GEPList[J]); + Candidates.remove(GEPI); + Candidates.remove(GEPJ); } else if (GEPI->idx_begin()->get() == GEPJ->idx_begin()->get()) { - Candidates.remove(GEPList[J]); + Candidates.remove(GEPJ); } } } @@ -5050,8 +5084,7 @@ // the getelementptrs. SmallVector Bundle(Candidates.size()); auto BundleIndex = 0u; - for (auto *V : Candidates) { - auto *GEP = cast(V); + for (auto *GEP : Candidates) { auto *GEPIdx = GEP->idx_begin()->get(); assert(GEP->getNumIndices() == 1 || !isa(GEPIdx)); Bundle[BundleIndex++] = GEPIdx; Index: test/Transforms/SLPVectorizer/X86/crash-SCEV.ll =================================================================== --- /dev/null +++ test/Transforms/SLPVectorizer/X86/crash-SCEV.ll @@ -0,0 +1,151 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -slp-vectorizer -S -o - -mtriple=i386 -mcpu=haswell < %s | FileCheck %s +target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" + +@shift = common local_unnamed_addr global [10 x i32] zeroinitializer, align 4 +@data = common local_unnamed_addr global [10 x i8*] zeroinitializer, align 4 + +define void @flat(i32 %intensity) { +; CHECK-LABEL: @flat( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 0), align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 1), align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 0), align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 1), align 4 +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 1, [[TMP0]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 [[SHR]] +; CHECK-NEXT: [[SHR1:%.*]] = lshr i32 1, [[TMP1]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[TMP3]], i32 [[SHR1]] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[D1_DATA_046:%.*]] = phi i8* [ [[TMP3]], [[ENTRY:%.*]] ], [ [[ADD_PTR23_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[Y_045:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = load i8, i8* [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i8> undef, i8 [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i8> [[TMP6]], i8 [[TMP4]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = zext <2 x i8> [[TMP7]] to <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = add nsw <2 x i32> , [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp sgt <2 x i32> [[TMP9]], +; CHECK-NEXT: [[TMP11:%.*]] = sub nsw <2 x i32> , [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <2 x i1> [[TMP10]], <2 x i32> [[TMP9]], <2 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i32> [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[TMP12]], i32 1 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[IDX_NEG:%.*]] = sub nsw i32 0, [[ADD]] +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[D1_DATA_046]], i32 [[IDX_NEG]] +; CHECK-NEXT: [[TMP15:%.*]] = load i8, i8* [[ADD_PTR]], align 1 +; CHECK-NEXT: [[CONV15:%.*]] = zext i8 [[TMP15]] to i32 +; CHECK-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], [[INTENSITY:%.*]] +; CHECK-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 +; CHECK-NEXT: store i8 [[CONV17]], i8* [[ADD_PTR]], align 1 +; CHECK-NEXT: [[ADD_PTR18:%.*]] = getelementptr inbounds i8, i8* [[D1_DATA_046]], i32 [[ADD]] +; CHECK-NEXT: [[TMP16:%.*]] = load i8, i8* [[ADD_PTR18]], align 1 +; CHECK-NEXT: [[NOT_TOBOOL:%.*]] = icmp eq i8 [[TMP16]], 0 +; CHECK-NEXT: [[CONV21:%.*]] = zext i1 [[NOT_TOBOOL]] to i8 +; CHECK-NEXT: store i8 [[CONV21]], i8* [[ADD_PTR18]], align 1 +; CHECK-NEXT: [[ADD_PTR23:%.*]] = getelementptr inbounds i8, i8* [[D1_DATA_046]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP17:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = load i8, i8* [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i8> undef, i8 [[TMP18]], i32 0 +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x i8> [[TMP19]], i8 [[TMP17]], i32 1 +; CHECK-NEXT: [[TMP21:%.*]] = zext <2 x i8> [[TMP20]] to <2 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = add nsw <2 x i32> , [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = icmp sgt <2 x i32> [[TMP22]], +; CHECK-NEXT: [[TMP24:%.*]] = sub nsw <2 x i32> , [[TMP21]] +; CHECK-NEXT: [[TMP25:%.*]] = select <2 x i1> [[TMP23]], <2 x i32> [[TMP22]], <2 x i32> [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x i32> [[TMP25]], i32 0 +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x i32> [[TMP25]], i32 1 +; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[IDX_NEG_1:%.*]] = sub nsw i32 0, [[ADD_1]] +; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR23]], i32 [[IDX_NEG_1]] +; CHECK-NEXT: [[TMP28:%.*]] = load i8, i8* [[ADD_PTR_1]], align 1 +; CHECK-NEXT: [[CONV15_1:%.*]] = zext i8 [[TMP28]] to i32 +; CHECK-NEXT: [[ADD16_1:%.*]] = add nsw i32 [[CONV15_1]], [[INTENSITY]] +; CHECK-NEXT: [[CONV17_1:%.*]] = trunc i32 [[ADD16_1]] to i8 +; CHECK-NEXT: store i8 [[CONV17_1]], i8* [[ADD_PTR_1]], align 1 +; CHECK-NEXT: [[ADD_PTR18_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR23]], i32 [[ADD_1]] +; CHECK-NEXT: [[TMP29:%.*]] = load i8, i8* [[ADD_PTR18_1]], align 1 +; CHECK-NEXT: [[NOT_TOBOOL_1:%.*]] = icmp eq i8 [[TMP29]], 0 +; CHECK-NEXT: [[CONV21_1:%.*]] = zext i1 [[NOT_TOBOOL_1]] to i8 +; CHECK-NEXT: store i8 [[CONV21_1]], i8* [[ADD_PTR18_1]], align 1 +; CHECK-NEXT: [[ADD_PTR23_1]] = getelementptr inbounds i8, i8* [[ADD_PTR23]], i32 [[TMP1]] +; CHECK-NEXT: [[INC_1]] = add nsw i32 [[Y_045]], 2 +; CHECK-NEXT: [[EXITCOND_1:%.*]] = icmp eq i32 [[INC_1]], 128 +; CHECK-NEXT: br i1 [[EXITCOND_1]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] +; +entry: + %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 0), align 4 + %1 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 1), align 4 + %2 = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 0), align 4 + %3 = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 1), align 4 + %shr = lshr i32 1, %0 + %arrayidx = getelementptr inbounds i8, i8* %2, i32 %shr + %shr1 = lshr i32 1, %1 + %arrayidx2 = getelementptr inbounds i8, i8* %3, i32 %shr1 + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret void + +for.body: ; preds = %for.body, %entry + %d1_data.046 = phi i8* [ %3, %entry ], [ %add.ptr23.1, %for.body ] + %y.045 = phi i32 [ 0, %entry ], [ %inc.1, %for.body ] + %4 = load i8, i8* %arrayidx, align 1 + %conv = zext i8 %4 to i32 + %sub = add nsw i32 %conv, -128 + %5 = load i8, i8* %arrayidx2, align 1 + %conv3 = zext i8 %5 to i32 + %sub4 = add nsw i32 %conv3, -128 + %cmp5 = icmp sgt i32 %sub, -1 + %sub7 = sub nsw i32 128, %conv + %cond = select i1 %cmp5, i32 %sub, i32 %sub7 + %cmp8 = icmp sgt i32 %sub4, -1 + %sub12 = sub nsw i32 128, %conv3 + %cond14 = select i1 %cmp8, i32 %sub4, i32 %sub12 + %add = add nsw i32 %cond14, %cond + %idx.neg = sub nsw i32 0, %add + %add.ptr = getelementptr inbounds i8, i8* %d1_data.046, i32 %idx.neg + %6 = load i8, i8* %add.ptr, align 1 + %conv15 = zext i8 %6 to i32 + %add16 = add nsw i32 %conv15, %intensity + %conv17 = trunc i32 %add16 to i8 + store i8 %conv17, i8* %add.ptr, align 1 + %add.ptr18 = getelementptr inbounds i8, i8* %d1_data.046, i32 %add + %7 = load i8, i8* %add.ptr18, align 1 + %not.tobool = icmp eq i8 %7, 0 + %conv21 = zext i1 %not.tobool to i8 + store i8 %conv21, i8* %add.ptr18, align 1 + %add.ptr23 = getelementptr inbounds i8, i8* %d1_data.046, i32 %1 + %8 = load i8, i8* %arrayidx, align 1 + %conv.1 = zext i8 %8 to i32 + %sub.1 = add nsw i32 %conv.1, -128 + %9 = load i8, i8* %arrayidx2, align 1 + %conv3.1 = zext i8 %9 to i32 + %sub4.1 = add nsw i32 %conv3.1, -128 + %cmp5.1 = icmp sgt i32 %sub.1, -1 + %sub7.1 = sub nsw i32 128, %conv.1 + %cond.1 = select i1 %cmp5.1, i32 %sub.1, i32 %sub7.1 + %cmp8.1 = icmp sgt i32 %sub4.1, -1 + %sub12.1 = sub nsw i32 128, %conv3.1 + %cond14.1 = select i1 %cmp8.1, i32 %sub4.1, i32 %sub12.1 + %add.1 = add nsw i32 %cond14.1, %cond.1 + %idx.neg.1 = sub nsw i32 0, %add.1 + %add.ptr.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %idx.neg.1 + %10 = load i8, i8* %add.ptr.1, align 1 + %conv15.1 = zext i8 %10 to i32 + %add16.1 = add nsw i32 %conv15.1, %intensity + %conv17.1 = trunc i32 %add16.1 to i8 + store i8 %conv17.1, i8* %add.ptr.1, align 1 + %add.ptr18.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %add.1 + %11 = load i8, i8* %add.ptr18.1, align 1 + %not.tobool.1 = icmp eq i8 %11, 0 + %conv21.1 = zext i1 %not.tobool.1 to i8 + store i8 %conv21.1, i8* %add.ptr18.1, align 1 + %add.ptr23.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %1 + %inc.1 = add nsw i32 %y.045, 2 + %exitcond.1 = icmp eq i32 %inc.1, 128 + br i1 %exitcond.1, label %for.cond.cleanup, label %for.body +} Index: test/Transforms/SLPVectorizer/X86/horizontal-list.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/horizontal-list.ll +++ test/Transforms/SLPVectorizer/X86/horizontal-list.ll @@ -100,16 +100,8 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr to <8 x float>*), align 16 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr1 to <8 x float>*), align 16 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[ADD:%.*]] = fadd fast float undef, [[CONV]] -; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]] -; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] -; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] ; CHECK-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2 ; CHECK-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float -; CHECK-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV6]] -; CHECK-NEXT: [[ADD19:%.*]] = fadd fast float undef, [[ADD7]] -; CHECK-NEXT: [[ADD19_1:%.*]] = fadd fast float undef, [[ADD19]] -; CHECK-NEXT: [[ADD19_2:%.*]] = fadd fast float undef, [[ADD19_1]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP3]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> @@ -119,7 +111,6 @@ ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; CHECK-NEXT: [[BIN_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]] ; CHECK-NEXT: [[BIN_EXTRA5:%.*]] = fadd fast float [[BIN_EXTRA]], [[CONV6]] -; CHECK-NEXT: [[ADD19_3:%.*]] = fadd fast float undef, [[ADD19_2]] ; CHECK-NEXT: store float [[BIN_EXTRA5]], float* @res, align 4 ; CHECK-NEXT: ret float [[BIN_EXTRA5]] ; @@ -131,16 +122,8 @@ ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr to <8 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([20 x float]* @arr1 to <8 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <8 x float> [[TMP2]], [[TMP1]] -; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float undef, [[CONV]] -; THRESHOLD-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]] -; THRESHOLD-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] -; THRESHOLD-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] ; THRESHOLD-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2 ; THRESHOLD-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float -; THRESHOLD-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV6]] -; THRESHOLD-NEXT: [[ADD19:%.*]] = fadd fast float undef, [[ADD7]] -; THRESHOLD-NEXT: [[ADD19_1:%.*]] = fadd fast float undef, [[ADD19]] -; THRESHOLD-NEXT: [[ADD19_2:%.*]] = fadd fast float undef, [[ADD19_1]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> undef, <8 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP3]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> @@ -150,7 +133,6 @@ ; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; THRESHOLD-NEXT: [[BIN_EXTRA:%.*]] = fadd fast float [[TMP4]], [[CONV]] ; THRESHOLD-NEXT: [[BIN_EXTRA5:%.*]] = fadd fast float [[BIN_EXTRA]], [[CONV6]] -; THRESHOLD-NEXT: [[ADD19_3:%.*]] = fadd fast float undef, [[ADD19_2]] ; THRESHOLD-NEXT: store float [[BIN_EXTRA5]], float* @res, align 4 ; THRESHOLD-NEXT: ret float [[BIN_EXTRA5]] ; @@ -205,17 +187,14 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[TMP4:%.*]] = fadd fast float undef, undef -; CHECK-NEXT: [[TMP5:%.*]] = fadd fast float undef, [[TMP4]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = fadd fast float undef, [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]] -; CHECK-NEXT: store float [[TMP8]], float* @res, align 4 -; CHECK-NEXT: ret float [[TMP8]] +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]] +; CHECK-NEXT: store float [[TMP5]], float* @res, align 4 +; CHECK-NEXT: ret float [[TMP5]] ; ; THRESHOLD-LABEL: @bazzz( ; THRESHOLD-NEXT: entry: @@ -224,17 +203,14 @@ ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] -; THRESHOLD-NEXT: [[TMP4:%.*]] = fadd fast float undef, undef -; THRESHOLD-NEXT: [[TMP5:%.*]] = fadd fast float undef, [[TMP4]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] -; THRESHOLD-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 -; THRESHOLD-NEXT: [[TMP7:%.*]] = fadd fast float undef, [[TMP5]] -; THRESHOLD-NEXT: [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]] -; THRESHOLD-NEXT: store float [[TMP8]], float* @res, align 4 -; THRESHOLD-NEXT: ret float [[TMP8]] +; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 +; THRESHOLD-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]] +; THRESHOLD-NEXT: store float [[TMP5]], float* @res, align 4 +; THRESHOLD-NEXT: ret float [[TMP5]] ; entry: %0 = load i32, i32* @n, align 4 @@ -267,16 +243,13 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[TMP4:%.*]] = fadd fast float undef, undef -; CHECK-NEXT: [[TMP5:%.*]] = fadd fast float undef, [[TMP4]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = fadd fast float undef, [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]] -; CHECK-NEXT: [[CONV4:%.*]] = fptosi float [[TMP8]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]] +; CHECK-NEXT: [[CONV4:%.*]] = fptosi float [[TMP5]] to i32 ; CHECK-NEXT: store i32 [[CONV4]], i32* @n, align 4 ; CHECK-NEXT: ret i32 [[CONV4]] ; @@ -287,16 +260,13 @@ ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([20 x float]* @arr1 to <4 x float>*), align 16 ; THRESHOLD-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[TMP2]], [[TMP1]] -; THRESHOLD-NEXT: [[TMP4:%.*]] = fadd fast float undef, undef -; THRESHOLD-NEXT: [[TMP5:%.*]] = fadd fast float undef, [[TMP4]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP3]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> ; THRESHOLD-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]] -; THRESHOLD-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 -; THRESHOLD-NEXT: [[TMP7:%.*]] = fadd fast float undef, [[TMP5]] -; THRESHOLD-NEXT: [[TMP8:%.*]] = fmul fast float [[CONV]], [[TMP6]] -; THRESHOLD-NEXT: [[CONV4:%.*]] = fptosi float [[TMP8]] to i32 +; THRESHOLD-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0 +; THRESHOLD-NEXT: [[TMP5:%.*]] = fmul fast float [[CONV]], [[TMP4]] +; THRESHOLD-NEXT: [[CONV4:%.*]] = fptosi float [[TMP5]] to i32 ; THRESHOLD-NEXT: store i32 [[CONV4]], i32* @n, align 4 ; THRESHOLD-NEXT: ret i32 [[CONV4]] ; @@ -412,21 +382,6 @@ ; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <16 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float undef, undef -; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] -; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] -; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] -; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] -; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] -; CHECK-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]] -; CHECK-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]] -; CHECK-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]] -; CHECK-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]] -; CHECK-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]] -; CHECK-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]] -; CHECK-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]] -; CHECK-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] -; CHECK-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]] ; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 ; CHECK-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 ; CHECK-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 @@ -461,37 +416,6 @@ ; CHECK-NEXT: [[ARRAYIDX_47:%.*]] = getelementptr inbounds float, float* [[X]], i64 47 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x float>* ; CHECK-NEXT: [[TMP3:%.*]] = load <32 x float>, <32 x float>* [[TMP2]], align 4 -; CHECK-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]] -; CHECK-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]] -; CHECK-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]] -; CHECK-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]] -; CHECK-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]] -; CHECK-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]] -; CHECK-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]] -; CHECK-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]] -; CHECK-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]] -; CHECK-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]] -; CHECK-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]] -; CHECK-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]] -; CHECK-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]] -; CHECK-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]] -; CHECK-NEXT: [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]] -; CHECK-NEXT: [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]] -; CHECK-NEXT: [[ADD_32:%.*]] = fadd fast float undef, [[ADD_31]] -; CHECK-NEXT: [[ADD_33:%.*]] = fadd fast float undef, [[ADD_32]] -; CHECK-NEXT: [[ADD_34:%.*]] = fadd fast float undef, [[ADD_33]] -; CHECK-NEXT: [[ADD_35:%.*]] = fadd fast float undef, [[ADD_34]] -; CHECK-NEXT: [[ADD_36:%.*]] = fadd fast float undef, [[ADD_35]] -; CHECK-NEXT: [[ADD_37:%.*]] = fadd fast float undef, [[ADD_36]] -; CHECK-NEXT: [[ADD_38:%.*]] = fadd fast float undef, [[ADD_37]] -; CHECK-NEXT: [[ADD_39:%.*]] = fadd fast float undef, [[ADD_38]] -; CHECK-NEXT: [[ADD_40:%.*]] = fadd fast float undef, [[ADD_39]] -; CHECK-NEXT: [[ADD_41:%.*]] = fadd fast float undef, [[ADD_40]] -; CHECK-NEXT: [[ADD_42:%.*]] = fadd fast float undef, [[ADD_41]] -; CHECK-NEXT: [[ADD_43:%.*]] = fadd fast float undef, [[ADD_42]] -; CHECK-NEXT: [[ADD_44:%.*]] = fadd fast float undef, [[ADD_43]] -; CHECK-NEXT: [[ADD_45:%.*]] = fadd fast float undef, [[ADD_44]] -; CHECK-NEXT: [[ADD_46:%.*]] = fadd fast float undef, [[ADD_45]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP3]], <32 x float> undef, <32 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP3]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> @@ -513,7 +437,6 @@ ; CHECK-NEXT: [[BIN_RDX16:%.*]] = fadd fast <16 x float> [[BIN_RDX14]], [[RDX_SHUF15]] ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <16 x float> [[BIN_RDX16]], i32 0 ; CHECK-NEXT: [[BIN_RDX17:%.*]] = fadd fast float [[TMP4]], [[TMP5]] -; CHECK-NEXT: [[ADD_47:%.*]] = fadd fast float undef, [[ADD_46]] ; CHECK-NEXT: ret float [[BIN_RDX17]] ; ; THRESHOLD-LABEL: @f( @@ -535,21 +458,6 @@ ; THRESHOLD-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <16 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[TMP0]], align 4 -; THRESHOLD-NEXT: [[ADD_1:%.*]] = fadd fast float undef, undef -; THRESHOLD-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] -; THRESHOLD-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] -; THRESHOLD-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] -; THRESHOLD-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] -; THRESHOLD-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] -; THRESHOLD-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]] -; THRESHOLD-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]] -; THRESHOLD-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]] -; THRESHOLD-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]] -; THRESHOLD-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]] -; THRESHOLD-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]] -; THRESHOLD-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]] -; THRESHOLD-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] -; THRESHOLD-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]] ; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 ; THRESHOLD-NEXT: [[ARRAYIDX_17:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 ; THRESHOLD-NEXT: [[ARRAYIDX_18:%.*]] = getelementptr inbounds float, float* [[X]], i64 18 @@ -584,37 +492,6 @@ ; THRESHOLD-NEXT: [[ARRAYIDX_47:%.*]] = getelementptr inbounds float, float* [[X]], i64 47 ; THRESHOLD-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_16]] to <32 x float>* ; THRESHOLD-NEXT: [[TMP3:%.*]] = load <32 x float>, <32 x float>* [[TMP2]], align 4 -; THRESHOLD-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]] -; THRESHOLD-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]] -; THRESHOLD-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]] -; THRESHOLD-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]] -; THRESHOLD-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]] -; THRESHOLD-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]] -; THRESHOLD-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]] -; THRESHOLD-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]] -; THRESHOLD-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]] -; THRESHOLD-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]] -; THRESHOLD-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]] -; THRESHOLD-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]] -; THRESHOLD-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]] -; THRESHOLD-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]] -; THRESHOLD-NEXT: [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]] -; THRESHOLD-NEXT: [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]] -; THRESHOLD-NEXT: [[ADD_32:%.*]] = fadd fast float undef, [[ADD_31]] -; THRESHOLD-NEXT: [[ADD_33:%.*]] = fadd fast float undef, [[ADD_32]] -; THRESHOLD-NEXT: [[ADD_34:%.*]] = fadd fast float undef, [[ADD_33]] -; THRESHOLD-NEXT: [[ADD_35:%.*]] = fadd fast float undef, [[ADD_34]] -; THRESHOLD-NEXT: [[ADD_36:%.*]] = fadd fast float undef, [[ADD_35]] -; THRESHOLD-NEXT: [[ADD_37:%.*]] = fadd fast float undef, [[ADD_36]] -; THRESHOLD-NEXT: [[ADD_38:%.*]] = fadd fast float undef, [[ADD_37]] -; THRESHOLD-NEXT: [[ADD_39:%.*]] = fadd fast float undef, [[ADD_38]] -; THRESHOLD-NEXT: [[ADD_40:%.*]] = fadd fast float undef, [[ADD_39]] -; THRESHOLD-NEXT: [[ADD_41:%.*]] = fadd fast float undef, [[ADD_40]] -; THRESHOLD-NEXT: [[ADD_42:%.*]] = fadd fast float undef, [[ADD_41]] -; THRESHOLD-NEXT: [[ADD_43:%.*]] = fadd fast float undef, [[ADD_42]] -; THRESHOLD-NEXT: [[ADD_44:%.*]] = fadd fast float undef, [[ADD_43]] -; THRESHOLD-NEXT: [[ADD_45:%.*]] = fadd fast float undef, [[ADD_44]] -; THRESHOLD-NEXT: [[ADD_46:%.*]] = fadd fast float undef, [[ADD_45]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP3]], <32 x float> undef, <32 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP3]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> @@ -636,7 +513,6 @@ ; THRESHOLD-NEXT: [[BIN_RDX16:%.*]] = fadd fast <16 x float> [[BIN_RDX14]], [[RDX_SHUF15]] ; THRESHOLD-NEXT: [[TMP5:%.*]] = extractelement <16 x float> [[BIN_RDX16]], i32 0 ; THRESHOLD-NEXT: [[BIN_RDX17:%.*]] = fadd fast float [[TMP4]], [[TMP5]] -; THRESHOLD-NEXT: [[ADD_47:%.*]] = fadd fast float undef, [[ADD_46]] ; THRESHOLD-NEXT: ret float [[BIN_RDX17]] ; entry: @@ -823,37 +699,6 @@ ; CHECK-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = fadd fast float undef, [[CONV]] -; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]] -; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] -; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] -; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] -; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] -; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] -; CHECK-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]] -; CHECK-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]] -; CHECK-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]] -; CHECK-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]] -; CHECK-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]] -; CHECK-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]] -; CHECK-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]] -; CHECK-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] -; CHECK-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]] -; CHECK-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]] -; CHECK-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]] -; CHECK-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]] -; CHECK-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]] -; CHECK-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]] -; CHECK-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]] -; CHECK-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]] -; CHECK-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]] -; CHECK-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]] -; CHECK-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]] -; CHECK-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]] -; CHECK-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]] -; CHECK-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]] -; CHECK-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]] -; CHECK-NEXT: [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP1]], <32 x float> undef, <32 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> @@ -866,7 +711,6 @@ ; CHECK-NEXT: [[BIN_RDX8:%.*]] = fadd fast <32 x float> [[BIN_RDX6]], [[RDX_SHUF7]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <32 x float> [[BIN_RDX8]], i32 0 ; CHECK-NEXT: [[BIN_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]] -; CHECK-NEXT: [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]] ; CHECK-NEXT: ret float [[BIN_EXTRA]] ; ; THRESHOLD-LABEL: @f1( @@ -906,37 +750,6 @@ ; THRESHOLD-NEXT: [[ARRAYIDX_31:%.*]] = getelementptr inbounds float, float* [[X]], i64 31 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <32 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <32 x float>, <32 x float>* [[TMP0]], align 4 -; THRESHOLD-NEXT: [[ADD:%.*]] = fadd fast float undef, [[CONV]] -; THRESHOLD-NEXT: [[ADD_1:%.*]] = fadd fast float undef, [[ADD]] -; THRESHOLD-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] -; THRESHOLD-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] -; THRESHOLD-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] -; THRESHOLD-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] -; THRESHOLD-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] -; THRESHOLD-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]] -; THRESHOLD-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]] -; THRESHOLD-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]] -; THRESHOLD-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]] -; THRESHOLD-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]] -; THRESHOLD-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]] -; THRESHOLD-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]] -; THRESHOLD-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] -; THRESHOLD-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]] -; THRESHOLD-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]] -; THRESHOLD-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]] -; THRESHOLD-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]] -; THRESHOLD-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]] -; THRESHOLD-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]] -; THRESHOLD-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]] -; THRESHOLD-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]] -; THRESHOLD-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]] -; THRESHOLD-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]] -; THRESHOLD-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]] -; THRESHOLD-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]] -; THRESHOLD-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]] -; THRESHOLD-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]] -; THRESHOLD-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]] -; THRESHOLD-NEXT: [[ADD_30:%.*]] = fadd fast float undef, [[ADD_29]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x float> [[TMP1]], <32 x float> undef, <32 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <32 x float> [[TMP1]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x float> [[BIN_RDX]], <32 x float> undef, <32 x i32> @@ -949,7 +762,6 @@ ; THRESHOLD-NEXT: [[BIN_RDX8:%.*]] = fadd fast <32 x float> [[BIN_RDX6]], [[RDX_SHUF7]] ; THRESHOLD-NEXT: [[TMP2:%.*]] = extractelement <32 x float> [[BIN_RDX8]], i32 0 ; THRESHOLD-NEXT: [[BIN_EXTRA:%.*]] = fadd fast float [[TMP2]], [[CONV]] -; THRESHOLD-NEXT: [[ADD_31:%.*]] = fadd fast float undef, [[ADD_30]] ; THRESHOLD-NEXT: ret float [[BIN_EXTRA]] ; entry: @@ -1060,17 +872,12 @@ ; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 ; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align 4 -; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP1]], [[TMP0]] ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 ; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 ; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4 x float>* ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4 -; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] -; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] -; CHECK-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] -; CHECK-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] ; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 ; CHECK-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 @@ -1081,14 +888,6 @@ ; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8 x float>* ; CHECK-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4 -; CHECK-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] -; CHECK-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]] -; CHECK-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]] -; CHECK-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]] -; CHECK-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]] -; CHECK-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]] -; CHECK-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]] -; CHECK-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]] ; CHECK-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 ; CHECK-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 ; CHECK-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 @@ -1107,21 +906,6 @@ ; CHECK-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x float>* ; CHECK-NEXT: [[TMP7:%.*]] = load <16 x float>, <16 x float>* [[TMP6]], align 4 -; CHECK-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] -; CHECK-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]] -; CHECK-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]] -; CHECK-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]] -; CHECK-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]] -; CHECK-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]] -; CHECK-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]] -; CHECK-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]] -; CHECK-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]] -; CHECK-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]] -; CHECK-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]] -; CHECK-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]] -; CHECK-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]] -; CHECK-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]] -; CHECK-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP7]], <16 x float> undef, <16 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <16 x float> [[TMP7]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[BIN_RDX]], <16 x float> undef, <16 x i32> @@ -1147,7 +931,6 @@ ; CHECK-NEXT: [[BIN_RDX18:%.*]] = fadd fast float [[BIN_RDX13]], [[TMP10]] ; CHECK-NEXT: [[TMP11:%.*]] = fadd fast float [[BIN_RDX18]], [[TMP1]] ; CHECK-NEXT: [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]] -; CHECK-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]] ; CHECK-NEXT: ret float [[TMP12]] ; ; THRESHOLD-LABEL: @loadadd31( @@ -1156,17 +939,12 @@ ; THRESHOLD-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4 ; THRESHOLD-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, float* [[X]], i64 2 ; THRESHOLD-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX_1]], align 4 -; THRESHOLD-NEXT: [[ADD_1:%.*]] = fadd fast float [[TMP1]], [[TMP0]] ; THRESHOLD-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; THRESHOLD-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, float* [[X]], i64 4 ; THRESHOLD-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, float* [[X]], i64 5 ; THRESHOLD-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, float* [[X]], i64 6 ; THRESHOLD-NEXT: [[TMP2:%.*]] = bitcast float* [[ARRAYIDX_2]] to <4 x float>* ; THRESHOLD-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4 -; THRESHOLD-NEXT: [[ADD_2:%.*]] = fadd fast float undef, [[ADD_1]] -; THRESHOLD-NEXT: [[ADD_3:%.*]] = fadd fast float undef, [[ADD_2]] -; THRESHOLD-NEXT: [[ADD_4:%.*]] = fadd fast float undef, [[ADD_3]] -; THRESHOLD-NEXT: [[ADD_5:%.*]] = fadd fast float undef, [[ADD_4]] ; THRESHOLD-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; THRESHOLD-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, float* [[X]], i64 8 ; THRESHOLD-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds float, float* [[X]], i64 9 @@ -1177,14 +955,6 @@ ; THRESHOLD-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds float, float* [[X]], i64 14 ; THRESHOLD-NEXT: [[TMP4:%.*]] = bitcast float* [[ARRAYIDX_6]] to <8 x float>* ; THRESHOLD-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* [[TMP4]], align 4 -; THRESHOLD-NEXT: [[ADD_6:%.*]] = fadd fast float undef, [[ADD_5]] -; THRESHOLD-NEXT: [[ADD_7:%.*]] = fadd fast float undef, [[ADD_6]] -; THRESHOLD-NEXT: [[ADD_8:%.*]] = fadd fast float undef, [[ADD_7]] -; THRESHOLD-NEXT: [[ADD_9:%.*]] = fadd fast float undef, [[ADD_8]] -; THRESHOLD-NEXT: [[ADD_10:%.*]] = fadd fast float undef, [[ADD_9]] -; THRESHOLD-NEXT: [[ADD_11:%.*]] = fadd fast float undef, [[ADD_10]] -; THRESHOLD-NEXT: [[ADD_12:%.*]] = fadd fast float undef, [[ADD_11]] -; THRESHOLD-NEXT: [[ADD_13:%.*]] = fadd fast float undef, [[ADD_12]] ; THRESHOLD-NEXT: [[ARRAYIDX_14:%.*]] = getelementptr inbounds float, float* [[X]], i64 15 ; THRESHOLD-NEXT: [[ARRAYIDX_15:%.*]] = getelementptr inbounds float, float* [[X]], i64 16 ; THRESHOLD-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds float, float* [[X]], i64 17 @@ -1203,21 +973,6 @@ ; THRESHOLD-NEXT: [[ARRAYIDX_29:%.*]] = getelementptr inbounds float, float* [[X]], i64 30 ; THRESHOLD-NEXT: [[TMP6:%.*]] = bitcast float* [[ARRAYIDX_14]] to <16 x float>* ; THRESHOLD-NEXT: [[TMP7:%.*]] = load <16 x float>, <16 x float>* [[TMP6]], align 4 -; THRESHOLD-NEXT: [[ADD_14:%.*]] = fadd fast float undef, [[ADD_13]] -; THRESHOLD-NEXT: [[ADD_15:%.*]] = fadd fast float undef, [[ADD_14]] -; THRESHOLD-NEXT: [[ADD_16:%.*]] = fadd fast float undef, [[ADD_15]] -; THRESHOLD-NEXT: [[ADD_17:%.*]] = fadd fast float undef, [[ADD_16]] -; THRESHOLD-NEXT: [[ADD_18:%.*]] = fadd fast float undef, [[ADD_17]] -; THRESHOLD-NEXT: [[ADD_19:%.*]] = fadd fast float undef, [[ADD_18]] -; THRESHOLD-NEXT: [[ADD_20:%.*]] = fadd fast float undef, [[ADD_19]] -; THRESHOLD-NEXT: [[ADD_21:%.*]] = fadd fast float undef, [[ADD_20]] -; THRESHOLD-NEXT: [[ADD_22:%.*]] = fadd fast float undef, [[ADD_21]] -; THRESHOLD-NEXT: [[ADD_23:%.*]] = fadd fast float undef, [[ADD_22]] -; THRESHOLD-NEXT: [[ADD_24:%.*]] = fadd fast float undef, [[ADD_23]] -; THRESHOLD-NEXT: [[ADD_25:%.*]] = fadd fast float undef, [[ADD_24]] -; THRESHOLD-NEXT: [[ADD_26:%.*]] = fadd fast float undef, [[ADD_25]] -; THRESHOLD-NEXT: [[ADD_27:%.*]] = fadd fast float undef, [[ADD_26]] -; THRESHOLD-NEXT: [[ADD_28:%.*]] = fadd fast float undef, [[ADD_27]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x float> [[TMP7]], <16 x float> undef, <16 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <16 x float> [[TMP7]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x float> [[BIN_RDX]], <16 x float> undef, <16 x i32> @@ -1243,7 +998,6 @@ ; THRESHOLD-NEXT: [[BIN_RDX18:%.*]] = fadd fast float [[BIN_RDX13]], [[TMP10]] ; THRESHOLD-NEXT: [[TMP11:%.*]] = fadd fast float [[BIN_RDX18]], [[TMP1]] ; THRESHOLD-NEXT: [[TMP12:%.*]] = fadd fast float [[TMP11]], [[TMP0]] -; THRESHOLD-NEXT: [[ADD_29:%.*]] = fadd fast float undef, [[ADD_28]] ; THRESHOLD-NEXT: ret float [[TMP12]] ; entry: @@ -1354,14 +1108,6 @@ ; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]] -; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]] -; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4]], [[CONV]] -; CHECK-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]] -; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]] -; CHECK-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]] -; CHECK-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]] -; CHECK-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> @@ -1371,7 +1117,6 @@ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; CHECK-NEXT: [[BIN_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] ; CHECK-NEXT: [[BIN_EXTRA5:%.*]] = fadd fast float [[BIN_EXTRA]], [[CONV]] -; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]] ; CHECK-NEXT: ret float [[BIN_EXTRA5]] ; ; THRESHOLD-LABEL: @extra_args( @@ -1388,14 +1133,6 @@ ; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 -; THRESHOLD-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]] -; THRESHOLD-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]] -; THRESHOLD-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4]], [[CONV]] -; THRESHOLD-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD5]] -; THRESHOLD-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]] -; THRESHOLD-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]] -; THRESHOLD-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD4_3]] -; THRESHOLD-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> @@ -1405,7 +1142,6 @@ ; THRESHOLD-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; THRESHOLD-NEXT: [[BIN_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] ; THRESHOLD-NEXT: [[BIN_EXTRA5:%.*]] = fadd fast float [[BIN_EXTRA]], [[CONV]] -; THRESHOLD-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]] ; THRESHOLD-NEXT: ret float [[BIN_EXTRA5]] ; entry: @@ -1456,14 +1192,6 @@ ; CHECK-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]] -; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]] -; CHECK-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD4]] -; CHECK-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]] -; CHECK-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]] -; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4_3]], [[CONV]] -; CHECK-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD5]] -; CHECK-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> @@ -1473,7 +1201,6 @@ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; CHECK-NEXT: [[BIN_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] ; CHECK-NEXT: [[BIN_EXTRA5:%.*]] = fadd fast float [[BIN_EXTRA]], [[CONV]] -; CHECK-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]] ; CHECK-NEXT: ret float [[BIN_EXTRA5]] ; ; THRESHOLD-LABEL: @extra_args_no_replace( @@ -1492,14 +1219,6 @@ ; THRESHOLD-NEXT: [[ARRAYIDX3_6:%.*]] = getelementptr inbounds float, float* [[X]], i64 7 ; THRESHOLD-NEXT: [[TMP0:%.*]] = bitcast float* [[X]] to <8 x float>* ; THRESHOLD-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* [[TMP0]], align 4 -; THRESHOLD-NEXT: [[ADD1:%.*]] = fadd fast float undef, [[ADD]] -; THRESHOLD-NEXT: [[ADD4:%.*]] = fadd fast float undef, [[ADD1]] -; THRESHOLD-NEXT: [[ADD4_1:%.*]] = fadd fast float undef, [[ADD4]] -; THRESHOLD-NEXT: [[ADD4_2:%.*]] = fadd fast float undef, [[ADD4_1]] -; THRESHOLD-NEXT: [[ADD4_3:%.*]] = fadd fast float undef, [[ADD4_2]] -; THRESHOLD-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4_3]], [[CONV]] -; THRESHOLD-NEXT: [[ADD4_4:%.*]] = fadd fast float undef, [[ADD5]] -; THRESHOLD-NEXT: [[ADD4_5:%.*]] = fadd fast float undef, [[ADD4_4]] ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = fadd fast <8 x float> [[TMP1]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x float> [[BIN_RDX]], <8 x float> undef, <8 x i32> @@ -1509,7 +1228,6 @@ ; THRESHOLD-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[BIN_RDX4]], i32 0 ; THRESHOLD-NEXT: [[BIN_EXTRA:%.*]] = fadd fast float [[TMP2]], [[ADD]] ; THRESHOLD-NEXT: [[BIN_EXTRA5:%.*]] = fadd fast float [[BIN_EXTRA]], [[CONV]] -; THRESHOLD-NEXT: [[ADD4_6:%.*]] = fadd fast float undef, [[ADD4_5]] ; THRESHOLD-NEXT: ret float [[BIN_EXTRA5]] ; entry: @@ -1560,10 +1278,6 @@ ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]], zeroinitializer ; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32> -; CHECK-NEXT: [[R1:%.*]] = add i32 [[ARG]], undef -; CHECK-NEXT: [[R2:%.*]] = add i32 [[R1]], undef -; CHECK-NEXT: [[R3:%.*]] = add i32 [[R2]], undef -; CHECK-NEXT: [[R4:%.*]] = add i32 [[R3]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP11]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> @@ -1571,7 +1285,6 @@ ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 ; CHECK-NEXT: [[BIN_EXTRA:%.*]] = add i32 [[TMP12]], [[ARG]] ; CHECK-NEXT: [[BIN_EXTRA3:%.*]] = add i32 [[BIN_EXTRA]], [[TMP9]] -; CHECK-NEXT: [[R5:%.*]] = add i32 [[R4]], undef ; CHECK-NEXT: ret i32 [[BIN_EXTRA3]] ; ; THRESHOLD-LABEL: @wobble( @@ -1588,10 +1301,6 @@ ; THRESHOLD-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 ; THRESHOLD-NEXT: [[TMP10:%.*]] = icmp eq <4 x i32> [[TMP8]], zeroinitializer ; THRESHOLD-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32> -; THRESHOLD-NEXT: [[R1:%.*]] = add i32 [[ARG]], undef -; THRESHOLD-NEXT: [[R2:%.*]] = add i32 [[R1]], undef -; THRESHOLD-NEXT: [[R3:%.*]] = add i32 [[R2]], undef -; THRESHOLD-NEXT: [[R4:%.*]] = add i32 [[R3]], undef ; THRESHOLD-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> undef, <4 x i32> ; THRESHOLD-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP11]], [[RDX_SHUF]] ; THRESHOLD-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> @@ -1599,7 +1308,6 @@ ; THRESHOLD-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0 ; THRESHOLD-NEXT: [[BIN_EXTRA:%.*]] = add i32 [[TMP12]], [[ARG]] ; THRESHOLD-NEXT: [[BIN_EXTRA3:%.*]] = add i32 [[BIN_EXTRA]], [[TMP9]] -; THRESHOLD-NEXT: [[R5:%.*]] = add i32 [[R4]], undef ; THRESHOLD-NEXT: ret i32 [[BIN_EXTRA3]] ; bb: Index: test/Transforms/SLPVectorizer/X86/reduction_loads.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/reduction_loads.ll +++ test/Transforms/SLPVectorizer/X86/reduction_loads.ll @@ -5,35 +5,28 @@ define i32 @test(i32* nocapture readonly %p) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* %p, i64 1 -; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* %p, i64 2 -; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* %p, i64 3 -; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* %p, i64 4 -; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* %p, i64 5 -; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* %p, i64 6 -; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* %p, i64 7 -; CHECK-NEXT: br label %for.body +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1 +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 2 +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 3 +; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 4 +; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 5 +; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 6 +; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, %entry ], [ %add.7, %for.body ] -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* %p to <8 x i32>* +; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_7:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i32> , [[TMP1]] -; CHECK-NEXT: [[ADD:%.*]] = add i32 undef, [[SUM]] -; CHECK-NEXT: [[ADD_1:%.*]] = add i32 undef, [[ADD]] -; CHECK-NEXT: [[ADD_2:%.*]] = add i32 undef, [[ADD_1]] -; CHECK-NEXT: [[ADD_3:%.*]] = add i32 undef, [[ADD_2]] -; CHECK-NEXT: [[ADD_4:%.*]] = add i32 undef, [[ADD_3]] -; CHECK-NEXT: [[ADD_5:%.*]] = add i32 undef, [[ADD_4]] -; CHECK-NEXT: [[ADD_6:%.*]] = add i32 undef, [[ADD_5]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP2]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]] ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 -; CHECK-NEXT: [[ADD_7:%.*]] = add i32 [[TMP4]], [[SUM]] -; CHECK-NEXT: br i1 true, label %for.end, label %for.body +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 +; CHECK-NEXT: [[ADD_7]] = add i32 [[TMP3]], [[SUM]] +; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret i32 [[ADD_7]] ;