diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -8027,10 +8027,6 @@ return false; } -static bool PhiTypeSorterFunc(Value *V, Value *V2) { - return V->getType() < V2->getType(); -} - /// Try and get a reduction value from a phi node. /// /// Given a phi node \p P in a block \p ParentBB, consider possible reductions @@ -8284,6 +8280,10 @@ bool Changed = false; SmallVector Incoming; SmallPtrSet VisitedInstrs; + // Maps phi nodes to the non-phi nodes found in the use tree for each phi + // node. Allows better to identify the chains that can be vectorized in the + // better way. + DenseMap> PHIToOpcodes; bool HaveVectorizedPhiNodes = true; while (HaveVectorizedPhiNodes) { @@ -8296,22 +8296,113 @@ if (!P) break; - if (!VisitedInstrs.count(P) && !R.isDeleted(P)) + // No need to analyze deleted, vectorized and non-vectorizable + // instructions. + if (!VisitedInstrs.count(P) && !R.isDeleted(P) && + !P->getType()->isVectorTy()) Incoming.push_back(P); } - // Sort by type. - llvm::stable_sort(Incoming, PhiTypeSorterFunc); + // Find the corresponding non-phi nodes for better matching when trying to + // build the tree. + for (Value *V : Incoming) { + SmallVectorImpl &Opcodes = + PHIToOpcodes.try_emplace(V).first->getSecond(); + if (!Opcodes.empty()) + continue; + SmallVector Nodes(1, V); + SmallPtrSet Visited; + while (!Nodes.empty()) { + auto *PHI = cast(Nodes.pop_back_val()); + if (!Visited.insert(PHI).second) + continue; + for (Value *V : PHI->incoming_values()) { + if (auto *PHI1 = dyn_cast((V))) { + Nodes.push_back(PHI1); + continue; + } + Opcodes.emplace_back(V); + } + } + } + + // Sort by type, parent, operands. + stable_sort(Incoming, [&PHIToOpcodes](Value *V1, Value *V2) { + if (V1->getType() < V2->getType()) + return true; + if (V1->getType() > V2->getType()) + return false; + ArrayRef Opcodes1 = PHIToOpcodes[V1]; + ArrayRef Opcodes2 = PHIToOpcodes[V2]; + if (Opcodes1.size() < Opcodes2.size()) + return true; + if (Opcodes1.size() > Opcodes2.size()) + return false; + for (int I = 0, E = Opcodes1.size(); I < E; ++I) { + // Undefs are compatible with any other value. + if (isa(Opcodes1[I]) || isa(Opcodes2[I])) + continue; + if (auto *I1 = dyn_cast(Opcodes1[I])) + if (auto *I2 = dyn_cast(Opcodes2[I])) { + if (I1->getParent() < I2->getParent()) + return true; + if (I1->getParent() > I2->getParent()) + return false; + InstructionsState S = getSameOpcode({I1, I2}); + if (S.getOpcode()) + continue; + return I1->getOpcode() < I2->getOpcode(); + } + if (isa(Opcodes1[I]) && isa(Opcodes2[I])) + continue; + if (Opcodes1[I]->getValueID() < Opcodes2[I]->getValueID()) + return true; + if (Opcodes1[I]->getValueID() > Opcodes2[I]->getValueID()) + return false; + } + return false; + }); + + auto &&AreCompatiblePHIs = [&PHIToOpcodes](Value *V1, Value *V2) { + if (V1 == V2) + return true; + if (V1->getType() != V2->getType()) + return false; + ArrayRef Opcodes1 = PHIToOpcodes[V1]; + ArrayRef Opcodes2 = PHIToOpcodes[V2]; + if (Opcodes1.size() != Opcodes2.size()) + return false; + for (int I = 0, E = Opcodes1.size(); I < E; ++I) { + // Undefs are compatible with any other value. + if (isa(Opcodes1[I]) || isa(Opcodes2[I])) + continue; + if (auto *I1 = dyn_cast(Opcodes1[I])) + if (auto *I2 = dyn_cast(Opcodes2[I])) { + if (I1->getParent() != I2->getParent()) + return false; + InstructionsState S = getSameOpcode({I1, I2}); + if (!S.getOpcode()) + return false; + continue; + } + if (isa(Opcodes1[I]) && isa(Opcodes2[I])) + continue; + if (Opcodes1[I]->getValueID() != Opcodes2[I]->getValueID()) + return false; + } + return true; + }; // Try to vectorize elements base on their type. + SmallVector Candidates; for (SmallVector::iterator IncIt = Incoming.begin(), E = Incoming.end(); IncIt != E;) { - // Look for the next elements with the same type. + // Look for the next elements with the same type, parent and operand + // kinds. SmallVector::iterator SameTypeIt = IncIt; - while (SameTypeIt != E && - (*SameTypeIt)->getType() == (*IncIt)->getType()) { + while (SameTypeIt != E && AreCompatiblePHIs(*SameTypeIt, *IncIt)) { VisitedInstrs.insert(*SameTypeIt); ++SameTypeIt; } @@ -8329,7 +8420,20 @@ // Success start over because instructions might have been changed. HaveVectorizedPhiNodes = true; Changed = true; - break; + } else if (NumElts < 4 && + (Candidates.empty() || + Candidates.front()->getType() == (*IncIt)->getType())) { + Candidates.append(IncIt, std::next(IncIt, NumElts)); + } + // Final attempt to vectorize phis with the same types. + if (SameTypeIt == E || (*SameTypeIt)->getType() != (*IncIt)->getType()) { + if (Candidates.size() > 1 && + tryToVectorizeList(Candidates, R, /*AllowReorder=*/true)) { + // Success start over because instructions might have been changed. + HaveVectorizedPhiNodes = true; + Changed = true; + } + Candidates.clear(); } // Start over at the next instruction of a different type (or the end). diff --git a/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll b/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll --- a/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll +++ b/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll @@ -132,77 +132,58 @@ ; MAX256-NEXT: bb: ; MAX256-NEXT: br label [[BB1:%.*]] ; MAX256: bb1: -; MAX256-NEXT: [[TMP0:%.*]] = insertelement <4 x half> poison, half [[HVAL:%.*]], i32 0 -; MAX256-NEXT: [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half [[HVAL]], i32 1 -; MAX256-NEXT: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half [[HVAL]], i32 2 -; MAX256-NEXT: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half [[HVAL]], i32 3 -; MAX256-NEXT: [[TMP4:%.*]] = fpext <4 x half> [[TMP3]] to <4 x float> -; MAX256-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> poison, <8 x i32> -; MAX256-NEXT: [[TMP5:%.*]] = insertelement <8 x float> poison, float [[FVAL:%.*]], i32 0 -; MAX256-NEXT: [[TMP6:%.*]] = insertelement <8 x float> [[TMP5]], float [[FVAL]], i32 1 -; MAX256-NEXT: [[TMP7:%.*]] = insertelement <8 x float> [[TMP6]], float [[FVAL]], i32 2 -; MAX256-NEXT: [[TMP8:%.*]] = insertelement <8 x float> [[TMP7]], float [[FVAL]], i32 3 -; MAX256-NEXT: [[TMP9:%.*]] = insertelement <8 x float> [[TMP8]], float [[FVAL]], i32 4 -; MAX256-NEXT: [[TMP10:%.*]] = insertelement <8 x float> [[TMP9]], float [[FVAL]], i32 5 -; MAX256-NEXT: [[TMP11:%.*]] = insertelement <8 x float> [[TMP10]], float [[FVAL]], i32 6 -; MAX256-NEXT: [[TMP12:%.*]] = insertelement <8 x float> [[TMP11]], float [[FVAL]], i32 7 -; MAX256-NEXT: [[TMP13:%.*]] = fmul <8 x float> [[SHUFFLE]], [[TMP12]] -; MAX256-NEXT: [[TMP14:%.*]] = fadd <8 x float> zeroinitializer, [[TMP13]] -; MAX256-NEXT: [[TMP15:%.*]] = extractelement <8 x float> [[SHUFFLE]], i32 3 -; MAX256-NEXT: [[TMP16:%.*]] = extractelement <8 x float> [[SHUFFLE]], i32 2 -; MAX256-NEXT: [[TMP17:%.*]] = extractelement <8 x float> [[SHUFFLE]], i32 1 -; MAX256-NEXT: [[TMP18:%.*]] = extractelement <8 x float> [[SHUFFLE]], i32 0 -; MAX256-NEXT: [[TMP19:%.*]] = insertelement <8 x float> poison, float [[TMP15]], i32 0 -; MAX256-NEXT: [[TMP20:%.*]] = insertelement <8 x float> [[TMP19]], float [[TMP16]], i32 1 -; MAX256-NEXT: [[TMP21:%.*]] = insertelement <8 x float> [[TMP20]], float [[TMP17]], i32 2 -; MAX256-NEXT: [[TMP22:%.*]] = insertelement <8 x float> [[TMP21]], float [[TMP18]], i32 3 -; MAX256-NEXT: [[TMP23:%.*]] = insertelement <8 x float> [[TMP22]], float [[TMP15]], i32 4 -; MAX256-NEXT: [[TMP24:%.*]] = insertelement <8 x float> [[TMP23]], float [[TMP16]], i32 5 -; MAX256-NEXT: [[TMP25:%.*]] = insertelement <8 x float> [[TMP24]], float [[TMP17]], i32 6 -; MAX256-NEXT: [[TMP26:%.*]] = insertelement <8 x float> [[TMP25]], float [[TMP18]], i32 7 -; MAX256-NEXT: [[TMP27:%.*]] = fmul <8 x float> [[TMP26]], [[TMP12]] -; MAX256-NEXT: [[TMP28:%.*]] = fadd <8 x float> zeroinitializer, [[TMP27]] -; MAX256-NEXT: [[TMP29:%.*]] = fmul <8 x float> [[TMP26]], [[TMP12]] -; MAX256-NEXT: [[TMP30:%.*]] = fadd <8 x float> zeroinitializer, [[TMP29]] -; MAX256-NEXT: [[TMP31:%.*]] = fmul <8 x float> [[TMP26]], [[TMP12]] -; MAX256-NEXT: [[TMP32:%.*]] = fadd <8 x float> zeroinitializer, [[TMP31]] -; MAX256-NEXT: [[TMP33:%.*]] = insertelement <8 x float> poison, float [[FVAL]], i32 2 -; MAX256-NEXT: [[TMP34:%.*]] = extractelement <8 x float> [[TMP14]], i32 0 -; MAX256-NEXT: [[TMP35:%.*]] = insertelement <8 x float> [[TMP33]], float [[TMP34]], i32 0 -; MAX256-NEXT: [[TMP36:%.*]] = extractelement <8 x float> [[TMP14]], i32 1 -; MAX256-NEXT: [[TMP37:%.*]] = insertelement <8 x float> [[TMP35]], float [[TMP36]], i32 1 -; MAX256-NEXT: [[TMP38:%.*]] = extractelement <8 x float> [[TMP14]], i32 4 -; MAX256-NEXT: [[TMP39:%.*]] = insertelement <8 x float> [[TMP37]], float [[TMP38]], i32 3 -; MAX256-NEXT: [[TMP40:%.*]] = extractelement <8 x float> [[TMP14]], i32 5 -; MAX256-NEXT: [[TMP41:%.*]] = insertelement <8 x float> [[TMP39]], float [[TMP40]], i32 4 -; MAX256-NEXT: [[SHUFFLE3:%.*]] = shufflevector <8 x float> [[TMP41]], <8 x float> poison, <8 x i32> -; MAX256-NEXT: [[TMP42:%.*]] = extractelement <8 x float> [[TMP28]], i32 2 -; MAX256-NEXT: [[TMP43:%.*]] = insertelement <8 x float> [[TMP5]], float [[TMP42]], i32 1 -; MAX256-NEXT: [[TMP44:%.*]] = extractelement <8 x float> [[TMP28]], i32 3 -; MAX256-NEXT: [[TMP45:%.*]] = insertelement <8 x float> [[TMP43]], float [[TMP44]], i32 2 -; MAX256-NEXT: [[TMP46:%.*]] = extractelement <8 x float> [[TMP28]], i32 6 -; MAX256-NEXT: [[TMP47:%.*]] = insertelement <8 x float> [[TMP45]], float [[TMP46]], i32 3 -; MAX256-NEXT: [[TMP48:%.*]] = extractelement <8 x float> [[TMP28]], i32 7 -; MAX256-NEXT: [[TMP49:%.*]] = insertelement <8 x float> [[TMP47]], float [[TMP48]], i32 4 -; MAX256-NEXT: [[SHUFFLE6:%.*]] = shufflevector <8 x float> [[TMP49]], <8 x float> poison, <8 x i32> -; MAX256-NEXT: [[TMP50:%.*]] = extractelement <8 x float> [[TMP30]], i32 2 -; MAX256-NEXT: [[TMP51:%.*]] = insertelement <8 x float> [[TMP5]], float [[TMP50]], i32 1 -; MAX256-NEXT: [[TMP52:%.*]] = extractelement <8 x float> [[TMP30]], i32 3 -; MAX256-NEXT: [[TMP53:%.*]] = insertelement <8 x float> [[TMP51]], float [[TMP52]], i32 2 -; MAX256-NEXT: [[TMP54:%.*]] = extractelement <8 x float> [[TMP30]], i32 6 -; MAX256-NEXT: [[TMP55:%.*]] = insertelement <8 x float> [[TMP53]], float [[TMP54]], i32 3 -; MAX256-NEXT: [[TMP56:%.*]] = extractelement <8 x float> [[TMP30]], i32 7 -; MAX256-NEXT: [[TMP57:%.*]] = insertelement <8 x float> [[TMP55]], float [[TMP56]], i32 4 -; MAX256-NEXT: [[SHUFFLE9:%.*]] = shufflevector <8 x float> [[TMP57]], <8 x float> poison, <8 x i32> -; MAX256-NEXT: [[TMP58:%.*]] = extractelement <8 x float> [[TMP32]], i32 2 -; MAX256-NEXT: [[TMP59:%.*]] = insertelement <8 x float> [[TMP5]], float [[TMP58]], i32 1 -; MAX256-NEXT: [[TMP60:%.*]] = extractelement <8 x float> [[TMP32]], i32 3 -; MAX256-NEXT: [[TMP61:%.*]] = insertelement <8 x float> [[TMP59]], float [[TMP60]], i32 2 -; MAX256-NEXT: [[TMP62:%.*]] = extractelement <8 x float> [[TMP32]], i32 6 -; MAX256-NEXT: [[TMP63:%.*]] = insertelement <8 x float> [[TMP61]], float [[TMP62]], i32 3 -; MAX256-NEXT: [[TMP64:%.*]] = extractelement <8 x float> [[TMP32]], i32 7 -; MAX256-NEXT: [[TMP65:%.*]] = insertelement <8 x float> [[TMP63]], float [[TMP64]], i32 4 -; MAX256-NEXT: [[SHUFFLE12:%.*]] = shufflevector <8 x float> [[TMP65]], <8 x float> poison, <8 x i32> +; MAX256-NEXT: [[I:%.*]] = fpext half [[HVAL:%.*]] to float +; MAX256-NEXT: [[I3:%.*]] = fpext half [[HVAL]] to float +; MAX256-NEXT: [[I6:%.*]] = fpext half [[HVAL]] to float +; MAX256-NEXT: [[I9:%.*]] = fpext half [[HVAL]] to float +; MAX256-NEXT: [[TMP0:%.*]] = insertelement <8 x float> poison, float [[I]], i32 0 +; MAX256-NEXT: [[TMP1:%.*]] = insertelement <8 x float> [[TMP0]], float [[I]], i32 1 +; MAX256-NEXT: [[TMP2:%.*]] = insertelement <8 x float> [[TMP1]], float [[I]], i32 2 +; MAX256-NEXT: [[TMP3:%.*]] = insertelement <8 x float> [[TMP2]], float [[I]], i32 3 +; MAX256-NEXT: [[TMP4:%.*]] = insertelement <8 x float> [[TMP3]], float [[I]], i32 4 +; MAX256-NEXT: [[TMP5:%.*]] = insertelement <8 x float> [[TMP4]], float [[I]], i32 5 +; MAX256-NEXT: [[TMP6:%.*]] = insertelement <8 x float> [[TMP5]], float [[I]], i32 6 +; MAX256-NEXT: [[TMP7:%.*]] = insertelement <8 x float> [[TMP6]], float [[I]], i32 7 +; MAX256-NEXT: [[TMP8:%.*]] = insertelement <8 x float> poison, float [[FVAL:%.*]], i32 0 +; MAX256-NEXT: [[TMP9:%.*]] = insertelement <8 x float> [[TMP8]], float [[FVAL]], i32 1 +; MAX256-NEXT: [[TMP10:%.*]] = insertelement <8 x float> [[TMP9]], float [[FVAL]], i32 2 +; MAX256-NEXT: [[TMP11:%.*]] = insertelement <8 x float> [[TMP10]], float [[FVAL]], i32 3 +; MAX256-NEXT: [[TMP12:%.*]] = insertelement <8 x float> [[TMP11]], float [[FVAL]], i32 4 +; MAX256-NEXT: [[TMP13:%.*]] = insertelement <8 x float> [[TMP12]], float [[FVAL]], i32 5 +; MAX256-NEXT: [[TMP14:%.*]] = insertelement <8 x float> [[TMP13]], float [[FVAL]], i32 6 +; MAX256-NEXT: [[TMP15:%.*]] = insertelement <8 x float> [[TMP14]], float [[FVAL]], i32 7 +; MAX256-NEXT: [[TMP16:%.*]] = fmul <8 x float> [[TMP7]], [[TMP15]] +; MAX256-NEXT: [[TMP17:%.*]] = fadd <8 x float> zeroinitializer, [[TMP16]] +; MAX256-NEXT: [[TMP18:%.*]] = insertelement <8 x float> poison, float [[I3]], i32 0 +; MAX256-NEXT: [[TMP19:%.*]] = insertelement <8 x float> [[TMP18]], float [[I3]], i32 1 +; MAX256-NEXT: [[TMP20:%.*]] = insertelement <8 x float> [[TMP19]], float [[I3]], i32 2 +; MAX256-NEXT: [[TMP21:%.*]] = insertelement <8 x float> [[TMP20]], float [[I3]], i32 3 +; MAX256-NEXT: [[TMP22:%.*]] = insertelement <8 x float> [[TMP21]], float [[I3]], i32 4 +; MAX256-NEXT: [[TMP23:%.*]] = insertelement <8 x float> [[TMP22]], float [[I3]], i32 5 +; MAX256-NEXT: [[TMP24:%.*]] = insertelement <8 x float> [[TMP23]], float [[I3]], i32 6 +; MAX256-NEXT: [[TMP25:%.*]] = insertelement <8 x float> [[TMP24]], float [[I3]], i32 7 +; MAX256-NEXT: [[TMP26:%.*]] = fmul <8 x float> [[TMP25]], [[TMP15]] +; MAX256-NEXT: [[TMP27:%.*]] = fadd <8 x float> zeroinitializer, [[TMP26]] +; MAX256-NEXT: [[TMP28:%.*]] = insertelement <8 x float> poison, float [[I6]], i32 0 +; MAX256-NEXT: [[TMP29:%.*]] = insertelement <8 x float> [[TMP28]], float [[I6]], i32 1 +; MAX256-NEXT: [[TMP30:%.*]] = insertelement <8 x float> [[TMP29]], float [[I6]], i32 2 +; MAX256-NEXT: [[TMP31:%.*]] = insertelement <8 x float> [[TMP30]], float [[I6]], i32 3 +; MAX256-NEXT: [[TMP32:%.*]] = insertelement <8 x float> [[TMP31]], float [[I6]], i32 4 +; MAX256-NEXT: [[TMP33:%.*]] = insertelement <8 x float> [[TMP32]], float [[I6]], i32 5 +; MAX256-NEXT: [[TMP34:%.*]] = insertelement <8 x float> [[TMP33]], float [[I6]], i32 6 +; MAX256-NEXT: [[TMP35:%.*]] = insertelement <8 x float> [[TMP34]], float [[I6]], i32 7 +; MAX256-NEXT: [[TMP36:%.*]] = fmul <8 x float> [[TMP35]], [[TMP15]] +; MAX256-NEXT: [[TMP37:%.*]] = fadd <8 x float> zeroinitializer, [[TMP36]] +; MAX256-NEXT: [[TMP38:%.*]] = insertelement <8 x float> poison, float [[I9]], i32 0 +; MAX256-NEXT: [[TMP39:%.*]] = insertelement <8 x float> [[TMP38]], float [[I9]], i32 1 +; MAX256-NEXT: [[TMP40:%.*]] = insertelement <8 x float> [[TMP39]], float [[I9]], i32 2 +; MAX256-NEXT: [[TMP41:%.*]] = insertelement <8 x float> [[TMP40]], float [[I9]], i32 3 +; MAX256-NEXT: [[TMP42:%.*]] = insertelement <8 x float> [[TMP41]], float [[I9]], i32 4 +; MAX256-NEXT: [[TMP43:%.*]] = insertelement <8 x float> [[TMP42]], float [[I9]], i32 5 +; MAX256-NEXT: [[TMP44:%.*]] = insertelement <8 x float> [[TMP43]], float [[I9]], i32 6 +; MAX256-NEXT: [[TMP45:%.*]] = insertelement <8 x float> [[TMP44]], float [[I9]], i32 7 +; MAX256-NEXT: [[TMP46:%.*]] = fmul <8 x float> [[TMP45]], [[TMP15]] +; MAX256-NEXT: [[TMP47:%.*]] = fadd <8 x float> zeroinitializer, [[TMP46]] ; MAX256-NEXT: switch i32 undef, label [[BB5:%.*]] [ ; MAX256-NEXT: i32 0, label [[BB2:%.*]] ; MAX256-NEXT: i32 1, label [[BB3:%.*]] @@ -211,154 +192,74 @@ ; MAX256: bb3: ; MAX256-NEXT: br label [[BB2]] ; MAX256: bb4: -; MAX256-NEXT: [[TMP66:%.*]] = insertelement <8 x float> poison, float [[FVAL]], i32 1 -; MAX256-NEXT: [[TMP67:%.*]] = insertelement <8 x float> [[TMP66]], float [[TMP34]], i32 0 -; MAX256-NEXT: [[TMP68:%.*]] = extractelement <8 x float> [[TMP14]], i32 3 -; MAX256-NEXT: [[TMP69:%.*]] = insertelement <8 x float> [[TMP67]], float [[TMP68]], i32 2 -; MAX256-NEXT: [[TMP70:%.*]] = insertelement <8 x float> [[TMP69]], float [[TMP38]], i32 3 -; MAX256-NEXT: [[TMP71:%.*]] = extractelement <8 x float> [[TMP14]], i32 7 -; MAX256-NEXT: [[TMP72:%.*]] = insertelement <8 x float> [[TMP70]], float [[TMP71]], i32 4 -; MAX256-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x float> [[TMP72]], <8 x float> poison, <8 x i32> -; MAX256-NEXT: [[TMP73:%.*]] = extractelement <8 x float> [[TMP28]], i32 0 -; MAX256-NEXT: [[TMP74:%.*]] = insertelement <8 x float> [[TMP66]], float [[TMP73]], i32 0 -; MAX256-NEXT: [[TMP75:%.*]] = insertelement <8 x float> [[TMP74]], float [[TMP44]], i32 2 -; MAX256-NEXT: [[TMP76:%.*]] = extractelement <8 x float> [[TMP28]], i32 4 -; MAX256-NEXT: [[TMP77:%.*]] = insertelement <8 x float> [[TMP75]], float [[TMP76]], i32 3 -; MAX256-NEXT: [[TMP78:%.*]] = insertelement <8 x float> [[TMP77]], float [[TMP48]], i32 4 -; MAX256-NEXT: [[SHUFFLE4:%.*]] = shufflevector <8 x float> [[TMP78]], <8 x float> poison, <8 x i32> -; MAX256-NEXT: [[TMP79:%.*]] = extractelement <8 x float> [[TMP30]], i32 0 -; MAX256-NEXT: [[TMP80:%.*]] = insertelement <8 x float> [[TMP66]], float [[TMP79]], i32 0 -; MAX256-NEXT: [[TMP81:%.*]] = insertelement <8 x float> [[TMP80]], float [[TMP52]], i32 2 -; MAX256-NEXT: [[TMP82:%.*]] = extractelement <8 x float> [[TMP30]], i32 4 -; MAX256-NEXT: [[TMP83:%.*]] = insertelement <8 x float> [[TMP81]], float [[TMP82]], i32 3 -; MAX256-NEXT: [[TMP84:%.*]] = insertelement <8 x float> [[TMP83]], float [[TMP56]], i32 4 -; MAX256-NEXT: [[SHUFFLE7:%.*]] = shufflevector <8 x float> [[TMP84]], <8 x float> poison, <8 x i32> -; MAX256-NEXT: [[TMP85:%.*]] = extractelement <8 x float> [[TMP32]], i32 0 -; MAX256-NEXT: [[TMP86:%.*]] = insertelement <8 x float> [[TMP66]], float [[TMP85]], i32 0 -; MAX256-NEXT: [[TMP87:%.*]] = insertelement <8 x float> [[TMP86]], float [[TMP60]], i32 2 -; MAX256-NEXT: [[TMP88:%.*]] = extractelement <8 x float> [[TMP32]], i32 4 -; MAX256-NEXT: [[TMP89:%.*]] = insertelement <8 x float> [[TMP87]], float [[TMP88]], i32 3 -; MAX256-NEXT: [[TMP90:%.*]] = insertelement <8 x float> [[TMP89]], float [[TMP64]], i32 4 -; MAX256-NEXT: [[SHUFFLE10:%.*]] = shufflevector <8 x float> [[TMP90]], <8 x float> poison, <8 x i32> ; MAX256-NEXT: br label [[BB2]] ; MAX256: bb5: -; MAX256-NEXT: [[TMP91:%.*]] = insertelement <8 x float> [[TMP5]], float [[TMP36]], i32 1 -; MAX256-NEXT: [[TMP92:%.*]] = extractelement <8 x float> [[TMP14]], i32 3 -; MAX256-NEXT: [[TMP93:%.*]] = insertelement <8 x float> [[TMP91]], float [[TMP92]], i32 2 -; MAX256-NEXT: [[TMP94:%.*]] = insertelement <8 x float> [[TMP93]], float [[TMP40]], i32 3 -; MAX256-NEXT: [[TMP95:%.*]] = extractelement <8 x float> [[TMP14]], i32 7 -; MAX256-NEXT: [[TMP96:%.*]] = insertelement <8 x float> [[TMP94]], float [[TMP95]], i32 4 -; MAX256-NEXT: [[SHUFFLE2:%.*]] = shufflevector <8 x float> [[TMP96]], <8 x float> poison, <8 x i32> -; MAX256-NEXT: [[TMP97:%.*]] = insertelement <8 x float> poison, float [[FVAL]], i32 1 -; MAX256-NEXT: [[TMP98:%.*]] = extractelement <8 x float> [[TMP28]], i32 0 -; MAX256-NEXT: [[TMP99:%.*]] = insertelement <8 x float> [[TMP97]], float [[TMP98]], i32 0 -; MAX256-NEXT: [[TMP100:%.*]] = insertelement <8 x float> [[TMP99]], float [[TMP42]], i32 2 -; MAX256-NEXT: [[TMP101:%.*]] = extractelement <8 x float> [[TMP28]], i32 4 -; MAX256-NEXT: [[TMP102:%.*]] = insertelement <8 x float> [[TMP100]], float [[TMP101]], i32 3 -; MAX256-NEXT: [[TMP103:%.*]] = insertelement <8 x float> [[TMP102]], float [[TMP46]], i32 4 -; MAX256-NEXT: [[SHUFFLE5:%.*]] = shufflevector <8 x float> [[TMP103]], <8 x float> poison, <8 x i32> -; MAX256-NEXT: [[TMP104:%.*]] = extractelement <8 x float> [[TMP30]], i32 0 -; MAX256-NEXT: [[TMP105:%.*]] = insertelement <8 x float> [[TMP97]], float [[TMP104]], i32 0 -; MAX256-NEXT: [[TMP106:%.*]] = insertelement <8 x float> [[TMP105]], float [[TMP50]], i32 2 -; MAX256-NEXT: [[TMP107:%.*]] = extractelement <8 x float> [[TMP30]], i32 4 -; MAX256-NEXT: [[TMP108:%.*]] = insertelement <8 x float> [[TMP106]], float [[TMP107]], i32 3 -; MAX256-NEXT: [[TMP109:%.*]] = insertelement <8 x float> [[TMP108]], float [[TMP54]], i32 4 -; MAX256-NEXT: [[SHUFFLE8:%.*]] = shufflevector <8 x float> [[TMP109]], <8 x float> poison, <8 x i32> -; MAX256-NEXT: [[TMP110:%.*]] = extractelement <8 x float> [[TMP32]], i32 0 -; MAX256-NEXT: [[TMP111:%.*]] = insertelement <8 x float> [[TMP97]], float [[TMP110]], i32 0 -; MAX256-NEXT: [[TMP112:%.*]] = insertelement <8 x float> [[TMP111]], float [[TMP58]], i32 2 -; MAX256-NEXT: [[TMP113:%.*]] = extractelement <8 x float> [[TMP32]], i32 4 -; MAX256-NEXT: [[TMP114:%.*]] = insertelement <8 x float> [[TMP112]], float [[TMP113]], i32 3 -; MAX256-NEXT: [[TMP115:%.*]] = insertelement <8 x float> [[TMP114]], float [[TMP62]], i32 4 -; MAX256-NEXT: [[SHUFFLE11:%.*]] = shufflevector <8 x float> [[TMP115]], <8 x float> poison, <8 x i32> ; MAX256-NEXT: br label [[BB2]] ; MAX256: bb2: -; MAX256-NEXT: [[TMP116:%.*]] = phi <8 x float> [ [[TMP14]], [[BB3]] ], [ [[SHUFFLE1]], [[BB4]] ], [ [[SHUFFLE2]], [[BB5]] ], [ [[SHUFFLE3]], [[BB1]] ] -; MAX256-NEXT: [[TMP117:%.*]] = phi <8 x float> [ [[TMP28]], [[BB3]] ], [ [[SHUFFLE4]], [[BB4]] ], [ [[SHUFFLE5]], [[BB5]] ], [ [[SHUFFLE6]], [[BB1]] ] -; MAX256-NEXT: [[TMP118:%.*]] = phi <8 x float> [ [[TMP30]], [[BB3]] ], [ [[SHUFFLE7]], [[BB4]] ], [ [[SHUFFLE8]], [[BB5]] ], [ [[SHUFFLE9]], [[BB1]] ] -; MAX256-NEXT: [[TMP119:%.*]] = phi <8 x float> [ [[TMP32]], [[BB3]] ], [ [[SHUFFLE10]], [[BB4]] ], [ [[SHUFFLE11]], [[BB5]] ], [ [[SHUFFLE12]], [[BB1]] ] -; MAX256-NEXT: [[TMP120:%.*]] = extractelement <8 x float> [[TMP119]], i32 6 -; MAX256-NEXT: store float [[TMP120]], float* undef, align 4 +; MAX256-NEXT: [[TMP48:%.*]] = phi <8 x float> [ [[TMP27]], [[BB3]] ], [ [[TMP15]], [[BB4]] ], [ [[TMP15]], [[BB5]] ], [ [[TMP15]], [[BB1]] ] +; MAX256-NEXT: [[TMP49:%.*]] = phi <8 x float> [ [[TMP37]], [[BB3]] ], [ [[TMP15]], [[BB4]] ], [ [[TMP37]], [[BB5]] ], [ [[TMP37]], [[BB1]] ] +; MAX256-NEXT: [[TMP50:%.*]] = phi <8 x float> [ [[TMP47]], [[BB3]] ], [ [[TMP47]], [[BB4]] ], [ [[TMP15]], [[BB5]] ], [ [[TMP47]], [[BB1]] ] +; MAX256-NEXT: [[TMP51:%.*]] = phi <8 x float> [ [[TMP17]], [[BB3]] ], [ [[TMP17]], [[BB4]] ], [ [[TMP17]], [[BB5]] ], [ [[TMP15]], [[BB1]] ] +; MAX256-NEXT: [[TMP52:%.*]] = extractelement <8 x float> [[TMP49]], i32 7 +; MAX256-NEXT: store float [[TMP52]], float* undef, align 4 ; MAX256-NEXT: ret void ; ; MAX1024-LABEL: @phi_float32( ; MAX1024-NEXT: bb: ; MAX1024-NEXT: br label [[BB1:%.*]] ; MAX1024: bb1: -; MAX1024-NEXT: [[TMP0:%.*]] = insertelement <4 x half> poison, half [[HVAL:%.*]], i32 0 -; MAX1024-NEXT: [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half [[HVAL]], i32 1 -; MAX1024-NEXT: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half [[HVAL]], i32 2 -; MAX1024-NEXT: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half [[HVAL]], i32 3 -; MAX1024-NEXT: [[TMP4:%.*]] = fpext <4 x half> [[TMP3]] to <4 x float> -; MAX1024-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> poison, <32 x i32> -; MAX1024-NEXT: [[TMP5:%.*]] = insertelement <32 x float> poison, float [[FVAL:%.*]], i32 0 -; MAX1024-NEXT: [[TMP6:%.*]] = insertelement <32 x float> [[TMP5]], float [[FVAL]], i32 1 -; MAX1024-NEXT: [[TMP7:%.*]] = insertelement <32 x float> [[TMP6]], float [[FVAL]], i32 2 -; MAX1024-NEXT: [[TMP8:%.*]] = insertelement <32 x float> [[TMP7]], float [[FVAL]], i32 3 -; MAX1024-NEXT: [[TMP9:%.*]] = insertelement <32 x float> [[TMP8]], float [[FVAL]], i32 4 -; MAX1024-NEXT: [[TMP10:%.*]] = insertelement <32 x float> [[TMP9]], float [[FVAL]], i32 5 -; MAX1024-NEXT: [[TMP11:%.*]] = insertelement <32 x float> [[TMP10]], float [[FVAL]], i32 6 -; MAX1024-NEXT: [[TMP12:%.*]] = insertelement <32 x float> [[TMP11]], float [[FVAL]], i32 7 -; MAX1024-NEXT: [[TMP13:%.*]] = insertelement <32 x float> [[TMP12]], float [[FVAL]], i32 8 -; MAX1024-NEXT: [[TMP14:%.*]] = insertelement <32 x float> [[TMP13]], float [[FVAL]], i32 9 -; MAX1024-NEXT: [[TMP15:%.*]] = insertelement <32 x float> [[TMP14]], float [[FVAL]], i32 10 -; MAX1024-NEXT: [[TMP16:%.*]] = insertelement <32 x float> [[TMP15]], float [[FVAL]], i32 11 -; MAX1024-NEXT: [[TMP17:%.*]] = insertelement <32 x float> [[TMP16]], float [[FVAL]], i32 12 -; MAX1024-NEXT: [[TMP18:%.*]] = insertelement <32 x float> [[TMP17]], float [[FVAL]], i32 13 -; MAX1024-NEXT: [[TMP19:%.*]] = insertelement <32 x float> [[TMP18]], float [[FVAL]], i32 14 -; MAX1024-NEXT: [[TMP20:%.*]] = insertelement <32 x float> [[TMP19]], float [[FVAL]], i32 15 -; MAX1024-NEXT: [[TMP21:%.*]] = insertelement <32 x float> [[TMP20]], float [[FVAL]], i32 16 -; MAX1024-NEXT: [[TMP22:%.*]] = insertelement <32 x float> [[TMP21]], float [[FVAL]], i32 17 -; MAX1024-NEXT: [[TMP23:%.*]] = insertelement <32 x float> [[TMP22]], float [[FVAL]], i32 18 -; MAX1024-NEXT: [[TMP24:%.*]] = insertelement <32 x float> [[TMP23]], float [[FVAL]], i32 19 -; MAX1024-NEXT: [[TMP25:%.*]] = insertelement <32 x float> [[TMP24]], float [[FVAL]], i32 20 -; MAX1024-NEXT: [[TMP26:%.*]] = insertelement <32 x float> [[TMP25]], float [[FVAL]], i32 21 -; MAX1024-NEXT: [[TMP27:%.*]] = insertelement <32 x float> [[TMP26]], float [[FVAL]], i32 22 -; MAX1024-NEXT: [[TMP28:%.*]] = insertelement <32 x float> [[TMP27]], float [[FVAL]], i32 23 -; MAX1024-NEXT: [[TMP29:%.*]] = insertelement <32 x float> [[TMP28]], float [[FVAL]], i32 24 -; MAX1024-NEXT: [[TMP30:%.*]] = insertelement <32 x float> [[TMP29]], float [[FVAL]], i32 25 -; MAX1024-NEXT: [[TMP31:%.*]] = insertelement <32 x float> [[TMP30]], float [[FVAL]], i32 26 -; MAX1024-NEXT: [[TMP32:%.*]] = insertelement <32 x float> [[TMP31]], float [[FVAL]], i32 27 -; MAX1024-NEXT: [[TMP33:%.*]] = insertelement <32 x float> [[TMP32]], float [[FVAL]], i32 28 -; MAX1024-NEXT: [[TMP34:%.*]] = insertelement <32 x float> [[TMP33]], float [[FVAL]], i32 29 -; MAX1024-NEXT: [[TMP35:%.*]] = insertelement <32 x float> [[TMP34]], float [[FVAL]], i32 30 -; MAX1024-NEXT: [[TMP36:%.*]] = insertelement <32 x float> [[TMP35]], float [[FVAL]], i32 31 -; MAX1024-NEXT: [[TMP37:%.*]] = fmul <32 x float> [[SHUFFLE]], [[TMP36]] -; MAX1024-NEXT: [[TMP38:%.*]] = fadd <32 x float> zeroinitializer, [[TMP37]] -; MAX1024-NEXT: [[TMP39:%.*]] = insertelement <32 x float> poison, float [[FVAL]], i32 2 -; MAX1024-NEXT: [[TMP40:%.*]] = extractelement <32 x float> [[TMP38]], i32 0 -; MAX1024-NEXT: [[TMP41:%.*]] = insertelement <32 x float> [[TMP39]], float [[TMP40]], i32 0 -; MAX1024-NEXT: [[TMP42:%.*]] = extractelement <32 x float> [[TMP38]], i32 1 -; MAX1024-NEXT: [[TMP43:%.*]] = insertelement <32 x float> [[TMP41]], float [[TMP42]], i32 1 -; MAX1024-NEXT: [[TMP44:%.*]] = extractelement <32 x float> [[TMP38]], i32 4 -; MAX1024-NEXT: [[TMP45:%.*]] = insertelement <32 x float> [[TMP43]], float [[TMP44]], i32 3 -; MAX1024-NEXT: [[TMP46:%.*]] = extractelement <32 x float> [[TMP38]], i32 5 -; MAX1024-NEXT: [[TMP47:%.*]] = insertelement <32 x float> [[TMP45]], float [[TMP46]], i32 4 -; MAX1024-NEXT: [[TMP48:%.*]] = extractelement <32 x float> [[TMP38]], i32 10 -; MAX1024-NEXT: [[TMP49:%.*]] = insertelement <32 x float> [[TMP47]], float [[TMP48]], i32 5 -; MAX1024-NEXT: [[TMP50:%.*]] = extractelement <32 x float> [[TMP38]], i32 11 -; MAX1024-NEXT: [[TMP51:%.*]] = insertelement <32 x float> [[TMP49]], float [[TMP50]], i32 6 -; MAX1024-NEXT: [[TMP52:%.*]] = extractelement <32 x float> [[TMP38]], i32 14 -; MAX1024-NEXT: [[TMP53:%.*]] = insertelement <32 x float> [[TMP51]], float [[TMP52]], i32 7 -; MAX1024-NEXT: [[TMP54:%.*]] = extractelement <32 x float> [[TMP38]], i32 15 -; MAX1024-NEXT: [[TMP55:%.*]] = insertelement <32 x float> [[TMP53]], float [[TMP54]], i32 8 -; MAX1024-NEXT: [[TMP56:%.*]] = extractelement <32 x float> [[TMP38]], i32 18 -; MAX1024-NEXT: [[TMP57:%.*]] = insertelement <32 x float> [[TMP55]], float [[TMP56]], i32 9 -; MAX1024-NEXT: [[TMP58:%.*]] = extractelement <32 x float> [[TMP38]], i32 19 -; MAX1024-NEXT: [[TMP59:%.*]] = insertelement <32 x float> [[TMP57]], float [[TMP58]], i32 10 -; MAX1024-NEXT: [[TMP60:%.*]] = extractelement <32 x float> [[TMP38]], i32 22 -; MAX1024-NEXT: [[TMP61:%.*]] = insertelement <32 x float> [[TMP59]], float [[TMP60]], i32 11 -; MAX1024-NEXT: [[TMP62:%.*]] = extractelement <32 x float> [[TMP38]], i32 23 -; MAX1024-NEXT: [[TMP63:%.*]] = insertelement <32 x float> [[TMP61]], float [[TMP62]], i32 12 -; MAX1024-NEXT: [[TMP64:%.*]] = extractelement <32 x float> [[TMP38]], i32 26 -; MAX1024-NEXT: [[TMP65:%.*]] = insertelement <32 x float> [[TMP63]], float [[TMP64]], i32 13 -; MAX1024-NEXT: [[TMP66:%.*]] = extractelement <32 x float> [[TMP38]], i32 27 -; MAX1024-NEXT: [[TMP67:%.*]] = insertelement <32 x float> [[TMP65]], float [[TMP66]], i32 14 -; MAX1024-NEXT: [[TMP68:%.*]] = extractelement <32 x float> [[TMP38]], i32 30 -; MAX1024-NEXT: [[TMP69:%.*]] = insertelement <32 x float> [[TMP67]], float [[TMP68]], i32 15 -; MAX1024-NEXT: [[TMP70:%.*]] = extractelement <32 x float> [[TMP38]], i32 31 -; MAX1024-NEXT: [[TMP71:%.*]] = insertelement <32 x float> [[TMP69]], float [[TMP70]], i32 16 -; MAX1024-NEXT: [[SHUFFLE3:%.*]] = shufflevector <32 x float> [[TMP71]], <32 x float> poison, <32 x i32> +; MAX1024-NEXT: [[I:%.*]] = fpext half [[HVAL:%.*]] to float +; MAX1024-NEXT: [[I3:%.*]] = fpext half [[HVAL]] to float +; MAX1024-NEXT: [[I6:%.*]] = fpext half [[HVAL]] to float +; MAX1024-NEXT: [[I9:%.*]] = fpext half [[HVAL]] to float +; MAX1024-NEXT: [[TMP0:%.*]] = insertelement <8 x float> poison, float [[I]], i32 0 +; MAX1024-NEXT: [[TMP1:%.*]] = insertelement <8 x float> [[TMP0]], float [[I]], i32 1 +; MAX1024-NEXT: [[TMP2:%.*]] = insertelement <8 x float> [[TMP1]], float [[I]], i32 2 +; MAX1024-NEXT: [[TMP3:%.*]] = insertelement <8 x float> [[TMP2]], float [[I]], i32 3 +; MAX1024-NEXT: [[TMP4:%.*]] = insertelement <8 x float> [[TMP3]], float [[I]], i32 4 +; MAX1024-NEXT: [[TMP5:%.*]] = insertelement <8 x float> [[TMP4]], float [[I]], i32 5 +; MAX1024-NEXT: [[TMP6:%.*]] = insertelement <8 x float> [[TMP5]], float [[I]], i32 6 +; MAX1024-NEXT: [[TMP7:%.*]] = insertelement <8 x float> [[TMP6]], float [[I]], i32 7 +; MAX1024-NEXT: [[TMP8:%.*]] = insertelement <8 x float> poison, float [[FVAL:%.*]], i32 0 +; MAX1024-NEXT: [[TMP9:%.*]] = insertelement <8 x float> [[TMP8]], float [[FVAL]], i32 1 +; MAX1024-NEXT: [[TMP10:%.*]] = insertelement <8 x float> [[TMP9]], float [[FVAL]], i32 2 +; MAX1024-NEXT: [[TMP11:%.*]] = insertelement <8 x float> [[TMP10]], float [[FVAL]], i32 3 +; MAX1024-NEXT: [[TMP12:%.*]] = insertelement <8 x float> [[TMP11]], float [[FVAL]], i32 4 +; MAX1024-NEXT: [[TMP13:%.*]] = insertelement <8 x float> [[TMP12]], float [[FVAL]], i32 5 +; MAX1024-NEXT: [[TMP14:%.*]] = insertelement <8 x float> [[TMP13]], float [[FVAL]], i32 6 +; MAX1024-NEXT: [[TMP15:%.*]] = insertelement <8 x float> [[TMP14]], float [[FVAL]], i32 7 +; MAX1024-NEXT: [[TMP16:%.*]] = fmul <8 x float> [[TMP7]], [[TMP15]] +; MAX1024-NEXT: [[TMP17:%.*]] = fadd <8 x float> zeroinitializer, [[TMP16]] +; MAX1024-NEXT: [[TMP18:%.*]] = insertelement <8 x float> poison, float [[I3]], i32 0 +; MAX1024-NEXT: [[TMP19:%.*]] = insertelement <8 x float> [[TMP18]], float [[I3]], i32 1 +; MAX1024-NEXT: [[TMP20:%.*]] = insertelement <8 x float> [[TMP19]], float [[I3]], i32 2 +; MAX1024-NEXT: [[TMP21:%.*]] = insertelement <8 x float> [[TMP20]], float [[I3]], i32 3 +; MAX1024-NEXT: [[TMP22:%.*]] = insertelement <8 x float> [[TMP21]], float [[I3]], i32 4 +; MAX1024-NEXT: [[TMP23:%.*]] = insertelement <8 x float> [[TMP22]], float [[I3]], i32 5 +; MAX1024-NEXT: [[TMP24:%.*]] = insertelement <8 x float> [[TMP23]], float [[I3]], i32 6 +; MAX1024-NEXT: [[TMP25:%.*]] = insertelement <8 x float> [[TMP24]], float [[I3]], i32 7 +; MAX1024-NEXT: [[TMP26:%.*]] = fmul <8 x float> [[TMP25]], [[TMP15]] +; MAX1024-NEXT: [[TMP27:%.*]] = fadd <8 x float> zeroinitializer, [[TMP26]] +; MAX1024-NEXT: [[TMP28:%.*]] = insertelement <8 x float> poison, float [[I6]], i32 0 +; MAX1024-NEXT: [[TMP29:%.*]] = insertelement <8 x float> [[TMP28]], float [[I6]], i32 1 +; MAX1024-NEXT: [[TMP30:%.*]] = insertelement <8 x float> [[TMP29]], float [[I6]], i32 2 +; MAX1024-NEXT: [[TMP31:%.*]] = insertelement <8 x float> [[TMP30]], float [[I6]], i32 3 +; MAX1024-NEXT: [[TMP32:%.*]] = insertelement <8 x float> [[TMP31]], float [[I6]], i32 4 +; MAX1024-NEXT: [[TMP33:%.*]] = insertelement <8 x float> [[TMP32]], float [[I6]], i32 5 +; MAX1024-NEXT: [[TMP34:%.*]] = insertelement <8 x float> [[TMP33]], float [[I6]], i32 6 +; MAX1024-NEXT: [[TMP35:%.*]] = insertelement <8 x float> [[TMP34]], float [[I6]], i32 7 +; MAX1024-NEXT: [[TMP36:%.*]] = fmul <8 x float> [[TMP35]], [[TMP15]] +; MAX1024-NEXT: [[TMP37:%.*]] = fadd <8 x float> zeroinitializer, [[TMP36]] +; MAX1024-NEXT: [[TMP38:%.*]] = insertelement <8 x float> poison, float [[I9]], i32 0 +; MAX1024-NEXT: [[TMP39:%.*]] = insertelement <8 x float> [[TMP38]], float [[I9]], i32 1 +; MAX1024-NEXT: [[TMP40:%.*]] = insertelement <8 x float> [[TMP39]], float [[I9]], i32 2 +; MAX1024-NEXT: [[TMP41:%.*]] = insertelement <8 x float> [[TMP40]], float [[I9]], i32 3 +; MAX1024-NEXT: [[TMP42:%.*]] = insertelement <8 x float> [[TMP41]], float [[I9]], i32 4 +; MAX1024-NEXT: [[TMP43:%.*]] = insertelement <8 x float> [[TMP42]], float [[I9]], i32 5 +; MAX1024-NEXT: [[TMP44:%.*]] = insertelement <8 x float> [[TMP43]], float [[I9]], i32 6 +; MAX1024-NEXT: [[TMP45:%.*]] = insertelement <8 x float> [[TMP44]], float [[I9]], i32 7 +; MAX1024-NEXT: [[TMP46:%.*]] = fmul <8 x float> [[TMP45]], [[TMP15]] +; MAX1024-NEXT: [[TMP47:%.*]] = fadd <8 x float> zeroinitializer, [[TMP46]] ; MAX1024-NEXT: switch i32 undef, label [[BB5:%.*]] [ ; MAX1024-NEXT: i32 0, label [[BB2:%.*]] ; MAX1024-NEXT: i32 1, label [[BB3:%.*]] @@ -367,64 +268,16 @@ ; MAX1024: bb3: ; MAX1024-NEXT: br label [[BB2]] ; MAX1024: bb4: -; MAX1024-NEXT: [[TMP72:%.*]] = insertelement <32 x float> poison, float [[FVAL]], i32 1 -; MAX1024-NEXT: [[TMP73:%.*]] = insertelement <32 x float> [[TMP72]], float [[TMP40]], i32 0 -; MAX1024-NEXT: [[TMP74:%.*]] = extractelement <32 x float> [[TMP38]], i32 3 -; MAX1024-NEXT: [[TMP75:%.*]] = insertelement <32 x float> [[TMP73]], float [[TMP74]], i32 2 -; MAX1024-NEXT: [[TMP76:%.*]] = insertelement <32 x float> [[TMP75]], float [[TMP44]], i32 3 -; MAX1024-NEXT: [[TMP77:%.*]] = extractelement <32 x float> [[TMP38]], i32 7 -; MAX1024-NEXT: [[TMP78:%.*]] = insertelement <32 x float> [[TMP76]], float [[TMP77]], i32 4 -; MAX1024-NEXT: [[TMP79:%.*]] = extractelement <32 x float> [[TMP38]], i32 8 -; MAX1024-NEXT: [[TMP80:%.*]] = insertelement <32 x float> [[TMP78]], float [[TMP79]], i32 5 -; MAX1024-NEXT: [[TMP81:%.*]] = insertelement <32 x float> [[TMP80]], float [[TMP50]], i32 6 -; MAX1024-NEXT: [[TMP82:%.*]] = extractelement <32 x float> [[TMP38]], i32 12 -; MAX1024-NEXT: [[TMP83:%.*]] = insertelement <32 x float> [[TMP81]], float [[TMP82]], i32 7 -; MAX1024-NEXT: [[TMP84:%.*]] = insertelement <32 x float> [[TMP83]], float [[TMP54]], i32 8 -; MAX1024-NEXT: [[TMP85:%.*]] = extractelement <32 x float> [[TMP38]], i32 16 -; MAX1024-NEXT: [[TMP86:%.*]] = insertelement <32 x float> [[TMP84]], float [[TMP85]], i32 9 -; MAX1024-NEXT: [[TMP87:%.*]] = insertelement <32 x float> [[TMP86]], float [[TMP58]], i32 10 -; MAX1024-NEXT: [[TMP88:%.*]] = extractelement <32 x float> [[TMP38]], i32 20 -; MAX1024-NEXT: [[TMP89:%.*]] = insertelement <32 x float> [[TMP87]], float [[TMP88]], i32 11 -; MAX1024-NEXT: [[TMP90:%.*]] = insertelement <32 x float> [[TMP89]], float [[TMP62]], i32 12 -; MAX1024-NEXT: [[TMP91:%.*]] = extractelement <32 x float> [[TMP38]], i32 24 -; MAX1024-NEXT: [[TMP92:%.*]] = insertelement <32 x float> [[TMP90]], float [[TMP91]], i32 13 -; MAX1024-NEXT: [[TMP93:%.*]] = insertelement <32 x float> [[TMP92]], float [[TMP66]], i32 14 -; MAX1024-NEXT: [[TMP94:%.*]] = extractelement <32 x float> [[TMP38]], i32 28 -; MAX1024-NEXT: [[TMP95:%.*]] = insertelement <32 x float> [[TMP93]], float [[TMP94]], i32 15 -; MAX1024-NEXT: [[TMP96:%.*]] = insertelement <32 x float> [[TMP95]], float [[TMP70]], i32 16 -; MAX1024-NEXT: [[SHUFFLE1:%.*]] = shufflevector <32 x float> [[TMP96]], <32 x float> poison, <32 x i32> ; MAX1024-NEXT: br label [[BB2]] ; MAX1024: bb5: -; MAX1024-NEXT: [[TMP97:%.*]] = insertelement <32 x float> [[TMP5]], float [[TMP42]], i32 1 -; MAX1024-NEXT: [[TMP98:%.*]] = extractelement <32 x float> [[TMP38]], i32 3 -; MAX1024-NEXT: [[TMP99:%.*]] = insertelement <32 x float> [[TMP97]], float [[TMP98]], i32 2 -; MAX1024-NEXT: [[TMP100:%.*]] = insertelement <32 x float> [[TMP99]], float [[TMP46]], i32 3 -; MAX1024-NEXT: [[TMP101:%.*]] = extractelement <32 x float> [[TMP38]], i32 7 -; MAX1024-NEXT: [[TMP102:%.*]] = insertelement <32 x float> [[TMP100]], float [[TMP101]], i32 4 -; MAX1024-NEXT: [[TMP103:%.*]] = extractelement <32 x float> [[TMP38]], i32 8 -; MAX1024-NEXT: [[TMP104:%.*]] = insertelement <32 x float> [[TMP102]], float [[TMP103]], i32 5 -; MAX1024-NEXT: [[TMP105:%.*]] = insertelement <32 x float> [[TMP104]], float [[TMP48]], i32 6 -; MAX1024-NEXT: [[TMP106:%.*]] = extractelement <32 x float> [[TMP38]], i32 12 -; MAX1024-NEXT: [[TMP107:%.*]] = insertelement <32 x float> [[TMP105]], float [[TMP106]], i32 7 -; MAX1024-NEXT: [[TMP108:%.*]] = insertelement <32 x float> [[TMP107]], float [[TMP52]], i32 8 -; MAX1024-NEXT: [[TMP109:%.*]] = extractelement <32 x float> [[TMP38]], i32 16 -; MAX1024-NEXT: [[TMP110:%.*]] = insertelement <32 x float> [[TMP108]], float [[TMP109]], i32 9 -; MAX1024-NEXT: [[TMP111:%.*]] = insertelement <32 x float> [[TMP110]], float [[TMP56]], i32 10 -; MAX1024-NEXT: [[TMP112:%.*]] = extractelement <32 x float> [[TMP38]], i32 20 -; MAX1024-NEXT: [[TMP113:%.*]] = insertelement <32 x float> [[TMP111]], float [[TMP112]], i32 11 -; MAX1024-NEXT: [[TMP114:%.*]] = insertelement <32 x float> [[TMP113]], float [[TMP60]], i32 12 -; MAX1024-NEXT: [[TMP115:%.*]] = extractelement <32 x float> [[TMP38]], i32 24 -; MAX1024-NEXT: [[TMP116:%.*]] = insertelement <32 x float> [[TMP114]], float [[TMP115]], i32 13 -; MAX1024-NEXT: [[TMP117:%.*]] = insertelement <32 x float> [[TMP116]], float [[TMP64]], i32 14 -; MAX1024-NEXT: [[TMP118:%.*]] = extractelement <32 x float> [[TMP38]], i32 28 -; MAX1024-NEXT: [[TMP119:%.*]] = insertelement <32 x float> [[TMP117]], float [[TMP118]], i32 15 -; MAX1024-NEXT: [[TMP120:%.*]] = insertelement <32 x float> [[TMP119]], float [[TMP68]], i32 16 -; MAX1024-NEXT: [[SHUFFLE2:%.*]] = shufflevector <32 x float> [[TMP120]], <32 x float> poison, <32 x i32> ; MAX1024-NEXT: br label [[BB2]] ; MAX1024: bb2: -; MAX1024-NEXT: [[TMP121:%.*]] = phi <32 x float> [ [[TMP38]], [[BB3]] ], [ [[SHUFFLE1]], [[BB4]] ], [ [[SHUFFLE2]], [[BB5]] ], [ [[SHUFFLE3]], [[BB1]] ] -; MAX1024-NEXT: [[TMP122:%.*]] = extractelement <32 x float> [[TMP121]], i32 30 -; MAX1024-NEXT: store float [[TMP122]], float* undef, align 4 +; MAX1024-NEXT: [[TMP48:%.*]] = phi <8 x float> [ [[TMP27]], [[BB3]] ], [ [[TMP15]], [[BB4]] ], [ [[TMP15]], [[BB5]] ], [ [[TMP15]], [[BB1]] ] +; MAX1024-NEXT: [[TMP49:%.*]] = phi <8 x float> [ [[TMP37]], [[BB3]] ], [ [[TMP15]], [[BB4]] ], [ [[TMP37]], [[BB5]] ], [ [[TMP37]], [[BB1]] ] +; MAX1024-NEXT: [[TMP50:%.*]] = phi <8 x float> [ [[TMP47]], [[BB3]] ], [ [[TMP47]], [[BB4]] ], [ [[TMP15]], [[BB5]] ], [ [[TMP47]], [[BB1]] ] +; MAX1024-NEXT: [[TMP51:%.*]] = phi <8 x float> [ [[TMP17]], [[BB3]] ], [ [[TMP17]], [[BB4]] ], [ [[TMP17]], [[BB5]] ], [ [[TMP15]], [[BB1]] ] +; MAX1024-NEXT: [[TMP52:%.*]] = extractelement <8 x float> [[TMP49]], i32 7 +; MAX1024-NEXT: store float [[TMP52]], float* undef, align 4 ; MAX1024-NEXT: ret void ; bb: