diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -40,60 +40,57 @@ namespace { static cl::opt WidenLoads( - "amdgpu-codegenprepare-widen-constant-loads", - cl::desc("Widen sub-dword constant address space loads in AMDGPUCodeGenPrepare"), - cl::ReallyHidden, - cl::init(false)); + "amdgpu-codegenprepare-widen-constant-loads", + cl::desc( + "Widen sub-dword constant address space loads in AMDGPUCodeGenPrepare"), + cl::ReallyHidden, cl::init(false)); static cl::opt Widen16BitOps( - "amdgpu-codegenprepare-widen-16-bit-ops", - cl::desc("Widen uniform 16-bit instructions to 32-bit in AMDGPUCodeGenPrepare"), - cl::ReallyHidden, - cl::init(true)); + "amdgpu-codegenprepare-widen-16-bit-ops", + cl::desc( + "Widen uniform 16-bit instructions to 32-bit in AMDGPUCodeGenPrepare"), + cl::ReallyHidden, cl::init(true)); static cl::opt - ScalarizeLargePHIs("amdgpu-codegenprepare-break-large-phis", - cl::desc("Break large PHI nodes for DAGISel"), - cl::ReallyHidden, cl::init(true)); + BreakLargePHIs("amdgpu-codegenprepare-break-large-phis", + cl::desc("Break large PHI nodes for DAGISel"), + cl::ReallyHidden, cl::init(true)); static cl::opt - ForceScalarizeLargePHIs("amdgpu-codegenprepare-force-break-large-phis", - cl::desc("For testing purposes, always break large " - "PHIs even if it isn't profitable."), - cl::ReallyHidden, cl::init(false)); + ForceBreakLargePHIs("amdgpu-codegenprepare-force-break-large-phis", + cl::desc("For testing purposes, always break large " + "PHIs even if it isn't profitable."), + cl::ReallyHidden, cl::init(false)); -static cl::opt ScalarizeLargePHIsThreshold( +static cl::opt BreakLargePHIsThreshold( "amdgpu-codegenprepare-break-large-phis-threshold", cl::desc("Minimum type size in bits for breaking large PHI nodes"), cl::ReallyHidden, cl::init(32)); static cl::opt UseMul24Intrin( - "amdgpu-codegenprepare-mul24", - cl::desc("Introduce mul24 intrinsics in AMDGPUCodeGenPrepare"), - cl::ReallyHidden, - cl::init(true)); + "amdgpu-codegenprepare-mul24", + cl::desc("Introduce mul24 intrinsics in AMDGPUCodeGenPrepare"), + cl::ReallyHidden, cl::init(true)); // Legalize 64-bit division by using the generic IR expansion. -static cl::opt ExpandDiv64InIR( - "amdgpu-codegenprepare-expand-div64", - cl::desc("Expand 64-bit division in AMDGPUCodeGenPrepare"), - cl::ReallyHidden, - cl::init(false)); +static cl::opt + ExpandDiv64InIR("amdgpu-codegenprepare-expand-div64", + cl::desc("Expand 64-bit division in AMDGPUCodeGenPrepare"), + cl::ReallyHidden, cl::init(false)); // Leave all division operations as they are. This supersedes ExpandDiv64InIR // and is used for testing the legalizer. static cl::opt DisableIDivExpand( - "amdgpu-codegenprepare-disable-idiv-expansion", - cl::desc("Prevent expanding integer division in AMDGPUCodeGenPrepare"), - cl::ReallyHidden, - cl::init(false)); + "amdgpu-codegenprepare-disable-idiv-expansion", + cl::desc("Prevent expanding integer division in AMDGPUCodeGenPrepare"), + cl::ReallyHidden, cl::init(false)); // Disable processing of fdiv so we can better test the backend implementations. static cl::opt DisableFDivExpand( - "amdgpu-codegenprepare-disable-fdiv-expansion", - cl::desc("Prevent expanding floating point division in AMDGPUCodeGenPrepare"), - cl::ReallyHidden, - cl::init(false)); + "amdgpu-codegenprepare-disable-fdiv-expansion", + cl::desc( + "Prevent expanding floating point division in AMDGPUCodeGenPrepare"), + cl::ReallyHidden, cl::init(false)); class AMDGPUCodeGenPrepareImpl : public InstVisitor { @@ -216,27 +213,25 @@ /// we expand some divisions here, we need to perform this before obscuring. bool foldBinOpIntoSelect(BinaryOperator &I) const; - bool divHasSpecialOptimization(BinaryOperator &I, - Value *Num, Value *Den) const; - int getDivNumBits(BinaryOperator &I, - Value *Num, Value *Den, - unsigned AtLeast, bool Signed) const; + bool divHasSpecialOptimization(BinaryOperator &I, Value *Num, + Value *Den) const; + int getDivNumBits(BinaryOperator &I, Value *Num, Value *Den, unsigned AtLeast, + bool Signed) const; /// Expands 24 bit div or rem. - Value* expandDivRem24(IRBuilder<> &Builder, BinaryOperator &I, - Value *Num, Value *Den, - bool IsDiv, bool IsSigned) const; + Value *expandDivRem24(IRBuilder<> &Builder, BinaryOperator &I, Value *Num, + Value *Den, bool IsDiv, bool IsSigned) const; - Value *expandDivRem24Impl(IRBuilder<> &Builder, BinaryOperator &I, - Value *Num, Value *Den, unsigned NumBits, - bool IsDiv, bool IsSigned) const; + Value *expandDivRem24Impl(IRBuilder<> &Builder, BinaryOperator &I, Value *Num, + Value *Den, unsigned NumBits, bool IsDiv, + bool IsSigned) const; /// Expands 32 bit div or rem. - Value* expandDivRem32(IRBuilder<> &Builder, BinaryOperator &I, - Value *Num, Value *Den) const; + Value *expandDivRem32(IRBuilder<> &Builder, BinaryOperator &I, Value *Num, + Value *Den) const; - Value *shrinkDivRem64(IRBuilder<> &Builder, BinaryOperator &I, - Value *Num, Value *Den) const; + Value *shrinkDivRem64(IRBuilder<> &Builder, BinaryOperator &I, Value *Num, + Value *Den) const; void expandDivRem64(BinaryOperator &I) const; /// Widen a scalar load. @@ -319,6 +314,8 @@ } // end anonymous namespace bool AMDGPUCodeGenPrepareImpl::run(Function &F) { + BreakPhiNodesCache.clear(); + bool MadeChange = false; Function::iterator NextBB; @@ -364,12 +361,14 @@ bool AMDGPUCodeGenPrepareImpl::isSigned(const BinaryOperator &I) const { return I.getOpcode() == Instruction::AShr || - I.getOpcode() == Instruction::SDiv || I.getOpcode() == Instruction::SRem; + I.getOpcode() == Instruction::SDiv || + I.getOpcode() == Instruction::SRem; } bool AMDGPUCodeGenPrepareImpl::isSigned(const SelectInst &I) const { - return isa(I.getOperand(0)) ? - cast(I.getOperand(0))->isSigned() : false; + return isa(I.getOperand(0)) + ? cast(I.getOperand(0))->isSigned() + : false; } bool AMDGPUCodeGenPrepareImpl::needsPromotionToI32(const Type *T) const { @@ -440,8 +439,7 @@ if (I.getOpcode() == Instruction::SDiv || I.getOpcode() == Instruction::UDiv || - I.getOpcode() == Instruction::SRem || - I.getOpcode() == Instruction::URem) + I.getOpcode() == Instruction::SRem || I.getOpcode() == Instruction::URem) return false; IRBuilder<> Builder(&I); @@ -491,7 +489,7 @@ Type *I32Ty = getI32Ty(Builder, I.getOperand(0)->getType()); Value *ExtOp0 = nullptr; Value *ExtOp1 = nullptr; - Value *NewICmp = nullptr; + Value *NewICmp = nullptr; if (I.isSigned()) { ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32Ty); @@ -549,13 +547,12 @@ Type *I32Ty = getI32Ty(Builder, I.getType()); Function *I32 = - Intrinsic::getDeclaration(Mod, Intrinsic::bitreverse, { I32Ty }); + Intrinsic::getDeclaration(Mod, Intrinsic::bitreverse, {I32Ty}); Value *ExtOp = Builder.CreateZExt(I.getOperand(0), I32Ty); - Value *ExtRes = Builder.CreateCall(I32, { ExtOp }); + Value *ExtRes = Builder.CreateCall(I32, {ExtOp}); Value *LShrOp = Builder.CreateLShr(ExtRes, 32 - getBaseElementBitWidth(I.getType())); - Value *TruncRes = - Builder.CreateTrunc(LShrOp, I.getType()); + Value *TruncRes = Builder.CreateTrunc(LShrOp, I.getType()); I.replaceAllUsesWith(TruncRes); I.eraseFromParent(); @@ -583,8 +580,7 @@ Values.push_back(Builder.CreateExtractElement(V, I)); } -static Value *insertValues(IRBuilder<> &Builder, - Type *Ty, +static Value *insertValues(IRBuilder<> &Builder, Type *Ty, SmallVectorImpl &Values) { if (!Ty->isVectorTy()) { assert(Values.size() == 1); @@ -743,15 +739,15 @@ // TODO: Handle special 0/-1 cases DAG combine does, although we only really // need to handle divisions here. - Constant *FoldedT = SelOpNo ? - ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CT, *DL) : - ConstantFoldBinaryOpOperands(BO.getOpcode(), CT, CBO, *DL); + Constant *FoldedT = + SelOpNo ? ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CT, *DL) + : ConstantFoldBinaryOpOperands(BO.getOpcode(), CT, CBO, *DL); if (!FoldedT || isa(FoldedT)) return false; - Constant *FoldedF = SelOpNo ? - ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CF, *DL) : - ConstantFoldBinaryOpOperands(BO.getOpcode(), CF, CBO, *DL); + Constant *FoldedF = + SelOpNo ? ConstantFoldBinaryOpOperands(BO.getOpcode(), CBO, CF, *DL) + : ConstantFoldBinaryOpOperands(BO.getOpcode(), CF, CBO, *DL); if (!FoldedF || isa(FoldedF)) return false; @@ -760,8 +756,8 @@ if (const FPMathOperator *FPOp = dyn_cast(&BO)) Builder.setFastMathFlags(FPOp->getFastMathFlags()); - Value *NewSelect = Builder.CreateSelect(Sel->getCondition(), - FoldedT, FoldedF); + Value *NewSelect = + Builder.CreateSelect(Sel->getCondition(), FoldedT, FoldedF); NewSelect->takeName(&BO); BO.replaceAllUsesWith(NewSelect); BO.eraseFromParent(); @@ -1160,8 +1156,8 @@ return Attr.getValueAsBool(); } -static std::pair getMul64(IRBuilder<> &Builder, - Value *LHS, Value *RHS) { +static std::pair getMul64(IRBuilder<> &Builder, Value *LHS, + Value *RHS) { Type *I32Ty = Builder.getInt32Ty(); Type *I64Ty = Builder.getInt64Ty(); @@ -1174,7 +1170,7 @@ return std::pair(Lo, Hi); } -static Value* getMulHu(IRBuilder<> &Builder, Value *LHS, Value *RHS) { +static Value *getMulHu(IRBuilder<> &Builder, Value *LHS, Value *RHS) { return getMul64(Builder, LHS, RHS).second; } @@ -1245,12 +1241,12 @@ : Builder.CreateUIToFP(IA, F32Ty); // float fb = (float)ib; - Value *FB = IsSigned ? Builder.CreateSIToFP(IB,F32Ty) - : Builder.CreateUIToFP(IB,F32Ty); + Value *FB = IsSigned ? Builder.CreateSIToFP(IB, F32Ty) + : Builder.CreateUIToFP(IB, F32Ty); Function *RcpDecl = Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_rcp, Builder.getFloatTy()); - Value *RCP = Builder.CreateCall(RcpDecl, { FB }); + Value *RCP = Builder.CreateCall(RcpDecl, {FB}); Value *FQM = Builder.CreateFMul(FA, RCP); // fq = trunc(fqm); @@ -1264,8 +1260,8 @@ auto FMAD = !ST->hasMadMacF32Insts() ? Intrinsic::fma : (Intrinsic::ID)Intrinsic::amdgcn_fmad_ftz; - Value *FR = Builder.CreateIntrinsic(FMAD, - {FQNeg->getType()}, {FQNeg, FB, FA}, FQ); + Value *FR = + Builder.CreateIntrinsic(FMAD, {FQNeg->getType()}, {FQNeg, FB, FA}, FQ); // int iq = (int)fq; Value *IQ = IsSigned ? Builder.CreateFPToSI(FQ, I32Ty) @@ -1301,8 +1297,7 @@ Res = Builder.CreateShl(Res, InRegBits); Res = Builder.CreateAShr(Res, InRegBits); } else { - ConstantInt *TruncMask - = Builder.getInt32((UINT64_C(1) << DivBits) - 1); + ConstantInt *TruncMask = Builder.getInt32((UINT64_C(1) << DivBits) - 1); Res = Builder.CreateAnd(Res, TruncMask); } } @@ -1338,8 +1333,8 @@ // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 if (BinOpDen->getOpcode() == Instruction::Shl && isa(BinOpDen->getOperand(0)) && - isKnownToBeAPowerOfTwo(BinOpDen->getOperand(0), *DL, true, - 0, AC, &I, DT)) { + isKnownToBeAPowerOfTwo(BinOpDen->getOperand(0), *DL, true, 0, AC, &I, + DT)) { return true; } } @@ -1369,7 +1364,7 @@ Builder.setFastMathFlags(FMF); if (divHasSpecialOptimization(I, X, Y)) - return nullptr; // Keep it for later optimization. + return nullptr; // Keep it for later optimization. bool IsDiv = Opc == Instruction::UDiv || Opc == Instruction::SDiv; bool IsSigned = Opc == Instruction::SRem || Opc == Instruction::SDiv; @@ -1389,8 +1384,8 @@ } if (Value *Res = expandDivRem24(Builder, I, X, Y, IsDiv, IsSigned)) { - return IsSigned ? Builder.CreateSExtOrTrunc(Res, Ty) : - Builder.CreateZExtOrTrunc(Res, Ty); + return IsSigned ? Builder.CreateSExtOrTrunc(Res, Ty) + : Builder.CreateZExtOrTrunc(Res, Ty); } ConstantInt *Zero = Builder.getInt32(0); @@ -1486,7 +1481,7 @@ BinaryOperator &I, Value *Num, Value *Den) const { if (!ExpandDiv64InIR && divHasSpecialOptimization(I, Num, Den)) - return nullptr; // Keep it for later optimization. + return nullptr; // Keep it for later optimization. Instruction::BinaryOps Opc = I.getOpcode(); @@ -1499,15 +1494,15 @@ Value *Narrowed = nullptr; if (NumDivBits <= 24) { - Narrowed = expandDivRem24Impl(Builder, I, Num, Den, NumDivBits, - IsDiv, IsSigned); + Narrowed = + expandDivRem24Impl(Builder, I, Num, Den, NumDivBits, IsDiv, IsSigned); } else if (NumDivBits <= 32) { Narrowed = expandDivRem32(Builder, I, Num, Den); } if (Narrowed) { - return IsSigned ? Builder.CreateSExt(Narrowed, Num->getType()) : - Builder.CreateZExt(Narrowed, Num->getType()); + return IsSigned ? Builder.CreateSExt(Narrowed, Num->getType()) + : Builder.CreateZExt(Narrowed, Num->getType()); } return nullptr; @@ -1550,8 +1545,7 @@ if ((Opc == Instruction::URem || Opc == Instruction::UDiv || Opc == Instruction::SRem || Opc == Instruction::SDiv) && - ScalarSize <= 64 && - !DisableIDivExpand) { + ScalarSize <= 64 && !DisableIDivExpand) { Value *Num = I.getOperand(0); Value *Den = I.getOperand(1); IRBuilder<> Builder(&I); @@ -1630,17 +1624,16 @@ // If we have range metadata, we need to convert the type, and not make // assumptions about the high bits. if (auto *Range = WidenLoad->getMetadata(LLVMContext::MD_range)) { - ConstantInt *Lower = - mdconst::extract(Range->getOperand(0)); + ConstantInt *Lower = mdconst::extract(Range->getOperand(0)); if (Lower->isNullValue()) { WidenLoad->setMetadata(LLVMContext::MD_range, nullptr); } else { Metadata *LowAndHigh[] = { - ConstantAsMetadata::get(ConstantInt::get(I32Ty, Lower->getValue().zext(32))), - // Don't make assumptions about the high bits. - ConstantAsMetadata::get(ConstantInt::get(I32Ty, 0)) - }; + ConstantAsMetadata::get( + ConstantInt::get(I32Ty, Lower->getValue().zext(32))), + // Don't make assumptions about the high bits. + ConstantAsMetadata::get(ConstantInt::get(I32Ty, 0))}; WidenLoad->setMetadata(LLVMContext::MD_range, MDNode::get(Mod->getContext(), LowAndHigh)); @@ -1777,47 +1770,79 @@ return false; } +static void collectPHINodes(const PHINode &I, + DenseSet &SeenPHIs) { + const auto [It, Inserted] = SeenPHIs.insert(&I); + if (!Inserted) + return; + + for (const Value *Inc : I.incoming_values()) { + if (const auto *PhiInc = dyn_cast(Inc)) + collectPHINodes(*PhiInc, SeenPHIs); + } + + for (const User *U : I.users()) { + if (const auto *PhiU = dyn_cast(U)) + collectPHINodes(*PhiU, SeenPHIs); + } +} + bool AMDGPUCodeGenPrepareImpl::canBreakPHINode(const PHINode &I) { - // Check in the cache, or add an entry for this node. - // - // We init with false because we consider all PHI nodes unbreakable until we - // reach a conclusion. Doing the opposite - assuming they're break-able until - // proven otherwise - can be harmful in some pathological cases so we're - // conservative for now. - const auto [It, DidInsert] = BreakPhiNodesCache.insert({&I, false}); - if (!DidInsert) + // Check in the cache first. + if (const auto It = BreakPhiNodesCache.find(&I); + It != BreakPhiNodesCache.end()) return It->second; - // This function may recurse, so to guard against infinite looping, this PHI - // is conservatively considered unbreakable until we reach a conclusion. + // We consider PHI nodes as part of "chains", so given a PHI node I, we + // recursively consider all its users and incoming values that are also PHI + // nodes. We then make a decision about all of those PHIs at once. Either they + // all get broken up, or none of them do. That way, we avoid cases where a + // single PHI is/is not broken and we end up reforming/exploding a vector + // multiple times, or even worse, doing it in a loop. + DenseSet WorkList; + collectPHINodes(I, WorkList); + +#ifndef NDEBUG + // Check that none of the PHI nodes in the worklist are in the map. If some of + // them are, it means we're not good enough at collecting related PHIs. + for (const PHINode *WLP : WorkList) { + assert(BreakPhiNodesCache.count(WLP) == 0); + } +#endif - // Don't break PHIs that have no interesting incoming values. That is, where - // there is no clear opportunity to fold the "extractelement" instructions we - // would add. + // To consider a PHI profitable to break, we need to see some interesting + // incoming values. At least 2/3rd (rounded up) of all PHIs in the worklist + // must have one to consider all PHIs breakable. // - // Note: IC does not run after this pass, so we're only interested in the - // foldings that the DAG combiner can do. - if (none_of(I.incoming_values(), - [&](Value *V) { return isInterestingPHIIncomingValue(V); })) - return false; - - // Now, check users for unbreakable PHI nodes. If we have an unbreakable PHI - // node as user, we don't want to break this PHI either because it's unlikely - // to be beneficial. We would just explode the vector and reassemble it - // directly, wasting instructions. + // This threshold has been determined through performance testing. + // + // Note that the computation below is equivalent to + // + // (unsigned)ceil((K / 3.0) * 2) // - // In the case where multiple users are PHI nodes, we want at least half of - // them to be breakable. - int Score = 0; - for (const Value *U : I.users()) { - if (const auto *PU = dyn_cast(U)) - Score += canBreakPHINode(*PU) ? 1 : -1; + // It's simply written this way to avoid mixing integral/FP arithmetic. + const auto Threshold = (alignTo(WorkList.size() * 2, 3) / 3); + unsigned NumBreakablePHIs = 0; + bool CanBreak = false; + for (const PHINode *Cur : WorkList) { + // Don't break PHIs that have no interesting incoming values. That is, where + // there is no clear opportunity to fold the "extractelement" instructions + // we would add. + // + // Note: IC does not run after this pass, so we're only interested in the + // foldings that the DAG combiner can do. + if (any_of(Cur->incoming_values(), isInterestingPHIIncomingValue)) { + if (++NumBreakablePHIs >= Threshold) { + CanBreak = true; + break; + } + } } - if (Score < 0) - return false; + for (const PHINode *Cur : WorkList) + BreakPhiNodesCache[Cur] = CanBreak; - return BreakPhiNodesCache[&I] = true; + return CanBreak; } /// Helper class for "break large PHIs" (visitPHINode). @@ -1898,14 +1923,15 @@ // operations with most elements being "undef". This inhibits a lot of // optimization opportunities and can result in unreasonably high register // pressure and the inevitable stack spilling. - if (!ScalarizeLargePHIs || getCGPassBuilderOption().EnableGlobalISelOption) + if (!BreakLargePHIs || getCGPassBuilderOption().EnableGlobalISelOption) return false; FixedVectorType *FVT = dyn_cast(I.getType()); - if (!FVT || DL->getTypeSizeInBits(FVT) <= ScalarizeLargePHIsThreshold) + if (!FVT || FVT->getNumElements() == 1 || + DL->getTypeSizeInBits(FVT) <= BreakLargePHIsThreshold) return false; - if (!ForceScalarizeLargePHIs && !canBreakPHINode(I)) + if (!ForceBreakLargePHIs && !canBreakPHINode(I)) return false; std::vector Slices; @@ -1930,8 +1956,7 @@ Slices.emplace_back(EltTy, Idx, 1); } - if (Slices.size() == 1) - return false; + assert(Slices.size() > 1); // Create one PHI per vector piece. The "VectorSlice" class takes care of // creating the necessary instruction to extract the relevant slices of each diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll @@ -707,6 +707,11 @@ ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE2]], double [[TMP3]], i64 3 ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE4:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE3]], double [[TMP4]], i64 4 ; CHECK-NEXT: store <5 x double> [[LARGEPHI_INSERTSLICE4]], ptr [[OUT:%.*]], align 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[IN]], i64 0 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[IN]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE5:%.*]] = extractelement <5 x double> [[IN]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE7:%.*]] = extractelement <5 x double> [[IN]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE9:%.*]] = extractelement <5 x double> [[IN]], i64 4 ; CHECK-NEXT: br i1 [[COND2:%.*]], label [[THEN1:%.*]], label [[END:%.*]] ; CHECK: then1: ; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE01:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 0 @@ -714,21 +719,35 @@ ; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE43:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 2 ; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE64:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 3 ; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE85:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 4 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE011:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 0 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE212:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE413:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE614:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE815:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 4 ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[ENDVAL:%.*]] = phi <5 x double> [ [[LARGEPHI_INSERTSLICE4]], [[THEN1]] ], [ [[IN]], [[FINALLY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE01]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE22]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ] -; CHECK-NEXT: [[TMP7:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE43]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ] -; CHECK-NEXT: [[TMP8:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE64]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ] -; CHECK-NEXT: [[TMP9:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE85]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE01]], [[THEN1]] ], [ [[LARGEPHI_EXTRACTSLICE1]], [[FINALLY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE22]], [[THEN1]] ], [ [[LARGEPHI_EXTRACTSLICE3]], [[FINALLY]] ] +; CHECK-NEXT: [[TMP7:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE43]], [[THEN1]] ], [ [[LARGEPHI_EXTRACTSLICE5]], [[FINALLY]] ] +; CHECK-NEXT: [[TMP8:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE64]], [[THEN1]] ], [ [[LARGEPHI_EXTRACTSLICE7]], [[FINALLY]] ] +; CHECK-NEXT: [[TMP9:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE85]], [[THEN1]] ], [ [[LARGEPHI_EXTRACTSLICE9]], [[FINALLY]] ] +; CHECK-NEXT: [[TMP10:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE011]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ] +; CHECK-NEXT: [[TMP11:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE212]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ] +; CHECK-NEXT: [[TMP12:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE413]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ] +; CHECK-NEXT: [[TMP13:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE614]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ] +; CHECK-NEXT: [[TMP14:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE815]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ] +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE016:%.*]] = insertelement <5 x double> poison, double [[TMP10]], i64 0 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE117:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE016]], double [[TMP11]], i64 1 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE218:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE117]], double [[TMP12]], i64 2 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE319:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE218]], double [[TMP13]], i64 3 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE420:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE319]], double [[TMP14]], i64 4 ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE06:%.*]] = insertelement <5 x double> poison, double [[TMP5]], i64 0 ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE17:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE06]], double [[TMP6]], i64 1 ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE28:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE17]], double [[TMP7]], i64 2 ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE39:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE28]], double [[TMP8]], i64 3 ; CHECK-NEXT: [[LARGEPHI_INSERTSLICE410:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE39]], double [[TMP9]], i64 4 -; CHECK-NEXT: store <5 x double> [[ENDVAL]], ptr [[OUT]], align 1 ; CHECK-NEXT: store <5 x double> [[LARGEPHI_INSERTSLICE410]], ptr [[OUT]], align 1 +; CHECK-NEXT: store <5 x double> [[LARGEPHI_INSERTSLICE420]], ptr [[OUT]], align 1 ; CHECK-NEXT: ret void ; entry: @@ -784,3 +803,468 @@ end: ret void } + + +define amdgpu_kernel void @test_breakable_chain_2_out_of_4(<5 x double> %in, ptr %out, i1 %cond) { +; CHECK-LABEL: @test_breakable_chain_2_out_of_4( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[COND0_TRUE:%.*]], label [[COND0_END:%.*]] +; CHECK: cond0.true: +; CHECK-NEXT: br label [[COND0_END]] +; CHECK: cond0.end: +; CHECK-NEXT: [[VAL_0:%.*]] = phi <5 x double> [ zeroinitializer, [[ENTRY:%.*]] ], [ zeroinitializer, [[COND0_TRUE]] ] +; CHECK-NEXT: br i1 [[COND]], label [[COND1_TRUE:%.*]], label [[COND1_END:%.*]] +; CHECK: cond1.true: +; CHECK-NEXT: br label [[COND1_END]] +; CHECK: cond1.end: +; CHECK-NEXT: [[VAL_1:%.*]] = phi <5 x double> [ [[VAL_0]], [[COND0_END]] ], [ zeroinitializer, [[COND1_TRUE]] ] +; CHECK-NEXT: br i1 [[COND]], label [[COND2_TRUE:%.*]], label [[COND2_END:%.*]] +; CHECK: cond2.true: +; CHECK-NEXT: br i1 [[COND]], label [[COND2_END]], label [[END:%.*]] +; CHECK: cond2.end: +; CHECK-NEXT: [[VAL_2:%.*]] = phi <5 x double> [ [[VAL_1]], [[COND1_END]] ], [ [[IN:%.*]], [[COND2_TRUE]] ] +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[ENDVAL:%.*]] = phi <5 x double> [ [[VAL_2]], [[COND2_END]] ], [ [[IN]], [[COND2_TRUE]] ] +; CHECK-NEXT: store <5 x double> [[ENDVAL]], ptr [[OUT:%.*]], align 1 +; CHECK-NEXT: ret void +; +entry: + br i1 %cond, label %cond0.true, label %cond0.end + +cond0.true: + br label %cond0.end + +cond0.end: + %val.0 = phi <5 x double> [ zeroinitializer, %entry ], [ zeroinitializer, %cond0.true ] + br i1 %cond, label %cond1.true, label %cond1.end + +cond1.true: + br label %cond1.end + +cond1.end: + %val.1 = phi <5 x double> [ %val.0, %cond0.end ], [ zeroinitializer, %cond1.true ] + br i1 %cond, label %cond2.true, label %cond2.end + +cond2.true: + br i1 %cond, label %cond2.end, label %end + +cond2.end: + %val.2 = phi <5 x double> [ %val.1, %cond1.end ], [ %in, %cond2.true ] + br label %end + +end: + %endval = phi <5 x double> [ %val.2, %cond2.end ], [ %in, %cond2.true ] + store <5 x double> %endval, ptr %out, align 1 + ret void +} + +define amdgpu_kernel void @test_breakable_chain_3_out_of_4(<5 x double> %in, ptr %out, i1 %cond) { +; CHECK-LABEL: @test_breakable_chain_3_out_of_4( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[COND0_TRUE:%.*]], label [[COND0_END:%.*]] +; CHECK: cond0.true: +; CHECK-NEXT: [[X:%.*]] = insertelement <5 x double> [[IN:%.*]], double 3.140000e+00, i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[IN]], i64 0 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[IN]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE5:%.*]] = extractelement <5 x double> [[IN]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE7:%.*]] = extractelement <5 x double> [[IN]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE9:%.*]] = extractelement <5 x double> [[IN]], i64 4 +; CHECK-NEXT: br label [[COND0_END]] +; CHECK: cond0.end: +; CHECK-NEXT: [[TMP0:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[LARGEPHI_EXTRACTSLICE1]], [[COND0_TRUE]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[LARGEPHI_EXTRACTSLICE3]], [[COND0_TRUE]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[LARGEPHI_EXTRACTSLICE5]], [[COND0_TRUE]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[LARGEPHI_EXTRACTSLICE7]], [[COND0_TRUE]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[LARGEPHI_EXTRACTSLICE9]], [[COND0_TRUE]] ] +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <5 x double> poison, double [[TMP0]], i64 0 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP1]], i64 1 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP2]], i64 2 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE2]], double [[TMP3]], i64 3 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE4:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE3]], double [[TMP4]], i64 4 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 0 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE6:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE8:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 4 +; CHECK-NEXT: br i1 [[COND]], label [[COND1_TRUE:%.*]], label [[COND1_END:%.*]] +; CHECK: cond1.true: +; CHECK-NEXT: br label [[COND1_END]] +; CHECK: cond1.end: +; CHECK-NEXT: [[TMP5:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[COND0_END]] ], [ 0.000000e+00, [[COND1_TRUE]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[COND0_END]] ], [ 0.000000e+00, [[COND1_TRUE]] ] +; CHECK-NEXT: [[TMP7:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[COND0_END]] ], [ 0.000000e+00, [[COND1_TRUE]] ] +; CHECK-NEXT: [[TMP8:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE6]], [[COND0_END]] ], [ 0.000000e+00, [[COND1_TRUE]] ] +; CHECK-NEXT: [[TMP9:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE8]], [[COND0_END]] ], [ 0.000000e+00, [[COND1_TRUE]] ] +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE01:%.*]] = insertelement <5 x double> poison, double [[TMP5]], i64 0 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE12:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE01]], double [[TMP6]], i64 1 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE23:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE12]], double [[TMP7]], i64 2 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE34:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE23]], double [[TMP8]], i64 3 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE45:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE34]], double [[TMP9]], i64 4 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE06:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE45]], i64 0 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE27:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE45]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE48:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE45]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE69:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE45]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE810:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE45]], i64 4 +; CHECK-NEXT: br i1 [[COND]], label [[COND2_TRUE:%.*]], label [[COND2_END:%.*]] +; CHECK: cond2.true: +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE117:%.*]] = extractelement <5 x double> [[IN]], i64 0 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE319:%.*]] = extractelement <5 x double> [[IN]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE521:%.*]] = extractelement <5 x double> [[IN]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE723:%.*]] = extractelement <5 x double> [[IN]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE925:%.*]] = extractelement <5 x double> [[IN]], i64 4 +; CHECK-NEXT: br i1 [[COND]], label [[COND2_END]], label [[END:%.*]] +; CHECK: cond2.end: +; CHECK-NEXT: [[TMP10:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE06]], [[COND1_END]] ], [ 0.000000e+00, [[COND2_TRUE]] ] +; CHECK-NEXT: [[TMP11:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE27]], [[COND1_END]] ], [ 0.000000e+00, [[COND2_TRUE]] ] +; CHECK-NEXT: [[TMP12:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE48]], [[COND1_END]] ], [ 0.000000e+00, [[COND2_TRUE]] ] +; CHECK-NEXT: [[TMP13:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE69]], [[COND1_END]] ], [ 0.000000e+00, [[COND2_TRUE]] ] +; CHECK-NEXT: [[TMP14:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE810]], [[COND1_END]] ], [ 0.000000e+00, [[COND2_TRUE]] ] +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE011:%.*]] = insertelement <5 x double> poison, double [[TMP10]], i64 0 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE112:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE011]], double [[TMP11]], i64 1 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE213:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE112]], double [[TMP12]], i64 2 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE314:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE213]], double [[TMP13]], i64 3 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE415:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE314]], double [[TMP14]], i64 4 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE016:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE415]], i64 0 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE218:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE415]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE420:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE415]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE622:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE415]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE824:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE415]], i64 4 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[TMP15:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE016]], [[COND2_END]] ], [ [[LARGEPHI_EXTRACTSLICE117]], [[COND2_TRUE]] ] +; CHECK-NEXT: [[TMP16:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE218]], [[COND2_END]] ], [ [[LARGEPHI_EXTRACTSLICE319]], [[COND2_TRUE]] ] +; CHECK-NEXT: [[TMP17:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE420]], [[COND2_END]] ], [ [[LARGEPHI_EXTRACTSLICE521]], [[COND2_TRUE]] ] +; CHECK-NEXT: [[TMP18:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE622]], [[COND2_END]] ], [ [[LARGEPHI_EXTRACTSLICE723]], [[COND2_TRUE]] ] +; CHECK-NEXT: [[TMP19:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE824]], [[COND2_END]] ], [ [[LARGEPHI_EXTRACTSLICE925]], [[COND2_TRUE]] ] +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE026:%.*]] = insertelement <5 x double> poison, double [[TMP15]], i64 0 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE127:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE026]], double [[TMP16]], i64 1 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE228:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE127]], double [[TMP17]], i64 2 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE329:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE228]], double [[TMP18]], i64 3 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE430:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE329]], double [[TMP19]], i64 4 +; CHECK-NEXT: store <5 x double> [[LARGEPHI_INSERTSLICE430]], ptr [[OUT:%.*]], align 1 +; CHECK-NEXT: ret void +; +entry: + br i1 %cond, label %cond0.true, label %cond0.end + +cond0.true: + %x = insertelement <5 x double> %in, double 3.140000e+00, i64 3 + br label %cond0.end + +cond0.end: + %val.0 = phi <5 x double> [ zeroinitializer, %entry ], [ %in, %cond0.true ] + br i1 %cond, label %cond1.true, label %cond1.end + +cond1.true: + br label %cond1.end + +cond1.end: + %val.1 = phi <5 x double> [ %val.0, %cond0.end ], [ zeroinitializer, %cond1.true ] + br i1 %cond, label %cond2.true, label %cond2.end + +cond2.true: + br i1 %cond, label %cond2.end, label %end + +cond2.end: + %val.2 = phi <5 x double> [ %val.1, %cond1.end ], [ zeroinitializer, %cond2.true ] + br label %end + +end: + %endval = phi <5 x double> [ %val.2, %cond2.end ], [ %in, %cond2.true ] + store <5 x double> %endval, ptr %out, align 1 + ret void +} + +; 7 PHIS, 4 Breakable +define amdgpu_kernel void @test_breakable_chain_4_out_of_7(<5 x double> %in, ptr %out, i1 %cond) { +; CHECK-LABEL: @test_breakable_chain_4_out_of_7( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[COND0_TRUE:%.*]], label [[COND0_END:%.*]] +; CHECK: cond0.true: +; CHECK-NEXT: br label [[COND0_END]] +; CHECK: cond0.end: +; CHECK-NEXT: [[VAL_0:%.*]] = phi <5 x double> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[IN:%.*]], [[COND0_TRUE]] ] +; CHECK-NEXT: br i1 [[COND]], label [[COND1_TRUE:%.*]], label [[COND1_END:%.*]] +; CHECK: cond1.true: +; CHECK-NEXT: br label [[COND1_END]] +; CHECK: cond1.end: +; CHECK-NEXT: [[VAL_1:%.*]] = phi <5 x double> [ [[VAL_0]], [[COND0_END]] ], [ zeroinitializer, [[COND1_TRUE]] ] +; CHECK-NEXT: br i1 [[COND]], label [[COND2_TRUE:%.*]], label [[COND2_END:%.*]] +; CHECK: cond2.true: +; CHECK-NEXT: br label [[COND2_END]] +; CHECK: cond2.end: +; CHECK-NEXT: [[VAL_2:%.*]] = phi <5 x double> [ [[VAL_1]], [[COND1_END]] ], [ zeroinitializer, [[COND2_TRUE]] ] +; CHECK-NEXT: br i1 [[COND]], label [[COND3_TRUE:%.*]], label [[COND3_END:%.*]] +; CHECK: cond3.true: +; CHECK-NEXT: br label [[COND3_END]] +; CHECK: cond3.end: +; CHECK-NEXT: [[VAL_3:%.*]] = phi <5 x double> [ [[VAL_2]], [[COND2_END]] ], [ zeroinitializer, [[COND3_TRUE]] ] +; CHECK-NEXT: br i1 [[COND]], label [[COND4_TRUE:%.*]], label [[COND4_END:%.*]] +; CHECK: cond4.true: +; CHECK-NEXT: [[B:%.*]] = insertelement <5 x double> [[VAL_0]], double 7.140000e+00, i64 4 +; CHECK-NEXT: br label [[COND4_END]] +; CHECK: cond4.end: +; CHECK-NEXT: [[VAL_4:%.*]] = phi <5 x double> [ [[VAL_3]], [[COND3_END]] ], [ [[IN]], [[COND4_TRUE]] ] +; CHECK-NEXT: br i1 [[COND]], label [[COND5_TRUE:%.*]], label [[COND5_END:%.*]] +; CHECK: cond5.true: +; CHECK-NEXT: br i1 [[COND]], label [[END:%.*]], label [[COND5_END]] +; CHECK: cond5.end: +; CHECK-NEXT: [[VAL_5:%.*]] = phi <5 x double> [ [[VAL_4]], [[COND4_END]] ], [ [[IN]], [[COND5_TRUE]] ] +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[ENDVAL:%.*]] = phi <5 x double> [ [[VAL_5]], [[COND5_END]] ], [ [[IN]], [[COND5_TRUE]] ] +; CHECK-NEXT: store <5 x double> [[ENDVAL]], ptr [[OUT:%.*]], align 1 +; CHECK-NEXT: ret void +; +entry: + br i1 %cond, label %cond0.true, label %cond0.end + +cond0.true: + br label %cond0.end + +cond0.end: + %val.0 = phi <5 x double> [ zeroinitializer, %entry ], [ %in, %cond0.true ] + br i1 %cond, label %cond1.true, label %cond1.end + +cond1.true: + br label %cond1.end + +cond1.end: + %val.1 = phi <5 x double> [ %val.0, %cond0.end ], [ zeroinitializer, %cond1.true ] + br i1 %cond, label %cond2.true, label %cond2.end + +cond2.true: + br label %cond2.end + +cond2.end: + %val.2 = phi <5 x double> [ %val.1, %cond1.end ], [ zeroinitializer, %cond2.true ] + br i1 %cond, label %cond3.true, label %cond3.end + +cond3.true: + br label %cond3.end + +cond3.end: + %val.3 = phi <5 x double> [ %val.2, %cond2.end ], [ zeroinitializer, %cond3.true ] + br i1 %cond, label %cond4.true, label %cond4.end + +cond4.true: + %b = insertelement <5 x double> %val.0, double 7.140000e+00, i64 4 + br label %cond4.end + +cond4.end: + %val.4 = phi <5 x double> [ %val.3, %cond3.end ], [ %in, %cond4.true ] + br i1 %cond, label %cond5.true, label %cond5.end + +cond5.true: + br i1 %cond, label %end, label %cond5.end + +cond5.end: + %val.5 = phi <5 x double> [ %val.4, %cond4.end ], [ %in, %cond5.true ] + br label %end + +end: + %endval = phi <5 x double> [ %val.5, %cond5.end ], [ %in, %cond5.true ] + store <5 x double> %endval, ptr %out, align 1 + ret void +} + +define amdgpu_kernel void @test_breakable_chain_5_out_of_7(<5 x double> %in, ptr %out, i1 %cond) { +; CHECK-LABEL: @test_breakable_chain_5_out_of_7( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[IN:%.*]], i64 0 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[IN]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[IN]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE6:%.*]] = extractelement <5 x double> [[IN]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE8:%.*]] = extractelement <5 x double> [[IN]], i64 4 +; CHECK-NEXT: br i1 [[COND:%.*]], label [[COND0_TRUE:%.*]], label [[COND0_END:%.*]] +; CHECK: cond0.true: +; CHECK-NEXT: br label [[COND0_END]] +; CHECK: cond0.end: +; CHECK-NEXT: [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[ENTRY:%.*]] ], [ 0.000000e+00, [[COND0_TRUE]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[ENTRY]] ], [ 0.000000e+00, [[COND0_TRUE]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[ENTRY]] ], [ 0.000000e+00, [[COND0_TRUE]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE6]], [[ENTRY]] ], [ 0.000000e+00, [[COND0_TRUE]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE8]], [[ENTRY]] ], [ 0.000000e+00, [[COND0_TRUE]] ] +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <5 x double> poison, double [[TMP0]], i64 0 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP1]], i64 1 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP2]], i64 2 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE2]], double [[TMP3]], i64 3 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE4:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE3]], double [[TMP4]], i64 4 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE01:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 0 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE22:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE43:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE64:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE85:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 4 +; CHECK-NEXT: br i1 [[COND]], label [[COND1_TRUE:%.*]], label [[COND1_END:%.*]] +; CHECK: cond1.true: +; CHECK-NEXT: br label [[COND1_END]] +; CHECK: cond1.end: +; CHECK-NEXT: [[TMP5:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE01]], [[COND0_END]] ], [ 0.000000e+00, [[COND1_TRUE]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE22]], [[COND0_END]] ], [ 0.000000e+00, [[COND1_TRUE]] ] +; CHECK-NEXT: [[TMP7:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE43]], [[COND0_END]] ], [ 0.000000e+00, [[COND1_TRUE]] ] +; CHECK-NEXT: [[TMP8:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE64]], [[COND0_END]] ], [ 0.000000e+00, [[COND1_TRUE]] ] +; CHECK-NEXT: [[TMP9:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE85]], [[COND0_END]] ], [ 0.000000e+00, [[COND1_TRUE]] ] +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE06:%.*]] = insertelement <5 x double> poison, double [[TMP5]], i64 0 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE17:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE06]], double [[TMP6]], i64 1 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE28:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE17]], double [[TMP7]], i64 2 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE39:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE28]], double [[TMP8]], i64 3 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE410:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE39]], double [[TMP9]], i64 4 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE011:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE410]], i64 0 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE212:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE410]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE413:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE410]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE614:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE410]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE815:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE410]], i64 4 +; CHECK-NEXT: br i1 [[COND]], label [[COND2_TRUE:%.*]], label [[COND2_END:%.*]] +; CHECK: cond2.true: +; CHECK-NEXT: br label [[COND2_END]] +; CHECK: cond2.end: +; CHECK-NEXT: [[TMP10:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE011]], [[COND1_END]] ], [ 0.000000e+00, [[COND2_TRUE]] ] +; CHECK-NEXT: [[TMP11:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE212]], [[COND1_END]] ], [ 0.000000e+00, [[COND2_TRUE]] ] +; CHECK-NEXT: [[TMP12:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE413]], [[COND1_END]] ], [ 0.000000e+00, [[COND2_TRUE]] ] +; CHECK-NEXT: [[TMP13:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE614]], [[COND1_END]] ], [ 0.000000e+00, [[COND2_TRUE]] ] +; CHECK-NEXT: [[TMP14:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE815]], [[COND1_END]] ], [ 0.000000e+00, [[COND2_TRUE]] ] +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE016:%.*]] = insertelement <5 x double> poison, double [[TMP10]], i64 0 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE117:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE016]], double [[TMP11]], i64 1 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE218:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE117]], double [[TMP12]], i64 2 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE319:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE218]], double [[TMP13]], i64 3 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE420:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE319]], double [[TMP14]], i64 4 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE021:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE420]], i64 0 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE222:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE420]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE423:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE420]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE624:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE420]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE825:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE420]], i64 4 +; CHECK-NEXT: br i1 [[COND]], label [[COND3_TRUE:%.*]], label [[COND3_END:%.*]] +; CHECK: cond3.true: +; CHECK-NEXT: [[A:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE4]], double 7.140000e+00, i64 4 +; CHECK-NEXT: br label [[COND3_END]] +; CHECK: cond3.end: +; CHECK-NEXT: [[TMP15:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE021]], [[COND2_END]] ], [ 0.000000e+00, [[COND3_TRUE]] ] +; CHECK-NEXT: [[TMP16:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE222]], [[COND2_END]] ], [ 0.000000e+00, [[COND3_TRUE]] ] +; CHECK-NEXT: [[TMP17:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE423]], [[COND2_END]] ], [ 0.000000e+00, [[COND3_TRUE]] ] +; CHECK-NEXT: [[TMP18:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE624]], [[COND2_END]] ], [ 0.000000e+00, [[COND3_TRUE]] ] +; CHECK-NEXT: [[TMP19:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE825]], [[COND2_END]] ], [ 0.000000e+00, [[COND3_TRUE]] ] +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE026:%.*]] = insertelement <5 x double> poison, double [[TMP15]], i64 0 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE127:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE026]], double [[TMP16]], i64 1 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE228:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE127]], double [[TMP17]], i64 2 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE329:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE228]], double [[TMP18]], i64 3 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE430:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE329]], double [[TMP19]], i64 4 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE031:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE430]], i64 0 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE232:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE430]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE433:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE430]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE634:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE430]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE835:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE430]], i64 4 +; CHECK-NEXT: br i1 [[COND]], label [[COND4_TRUE:%.*]], label [[COND4_END:%.*]] +; CHECK: cond4.true: +; CHECK-NEXT: br label [[COND4_END]] +; CHECK: cond4.end: +; CHECK-NEXT: [[TMP20:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE031]], [[COND3_END]] ], [ 0.000000e+00, [[COND4_TRUE]] ] +; CHECK-NEXT: [[TMP21:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE232]], [[COND3_END]] ], [ 0.000000e+00, [[COND4_TRUE]] ] +; CHECK-NEXT: [[TMP22:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE433]], [[COND3_END]] ], [ 0.000000e+00, [[COND4_TRUE]] ] +; CHECK-NEXT: [[TMP23:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE634]], [[COND3_END]] ], [ 0.000000e+00, [[COND4_TRUE]] ] +; CHECK-NEXT: [[TMP24:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE835]], [[COND3_END]] ], [ 0.000000e+00, [[COND4_TRUE]] ] +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE036:%.*]] = insertelement <5 x double> poison, double [[TMP20]], i64 0 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE137:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE036]], double [[TMP21]], i64 1 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE238:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE137]], double [[TMP22]], i64 2 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE339:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE238]], double [[TMP23]], i64 3 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE440:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE339]], double [[TMP24]], i64 4 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE041:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE440]], i64 0 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE242:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE440]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE443:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE440]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE644:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE440]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE845:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE440]], i64 4 +; CHECK-NEXT: br i1 [[COND]], label [[COND5_TRUE:%.*]], label [[COND5_END:%.*]] +; CHECK: cond5.true: +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE1:%.*]] = extractelement <5 x double> [[IN]], i64 0 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE3:%.*]] = extractelement <5 x double> [[IN]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE5:%.*]] = extractelement <5 x double> [[IN]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE7:%.*]] = extractelement <5 x double> [[IN]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE9:%.*]] = extractelement <5 x double> [[IN]], i64 4 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE152:%.*]] = extractelement <5 x double> [[IN]], i64 0 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE354:%.*]] = extractelement <5 x double> [[IN]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE556:%.*]] = extractelement <5 x double> [[IN]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE758:%.*]] = extractelement <5 x double> [[IN]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE960:%.*]] = extractelement <5 x double> [[IN]], i64 4 +; CHECK-NEXT: br i1 [[COND]], label [[END:%.*]], label [[COND5_END]] +; CHECK: cond5.end: +; CHECK-NEXT: [[TMP25:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE041]], [[COND4_END]] ], [ [[LARGEPHI_EXTRACTSLICE1]], [[COND5_TRUE]] ] +; CHECK-NEXT: [[TMP26:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE242]], [[COND4_END]] ], [ [[LARGEPHI_EXTRACTSLICE3]], [[COND5_TRUE]] ] +; CHECK-NEXT: [[TMP27:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE443]], [[COND4_END]] ], [ [[LARGEPHI_EXTRACTSLICE5]], [[COND5_TRUE]] ] +; CHECK-NEXT: [[TMP28:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE644]], [[COND4_END]] ], [ [[LARGEPHI_EXTRACTSLICE7]], [[COND5_TRUE]] ] +; CHECK-NEXT: [[TMP29:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE845]], [[COND4_END]] ], [ [[LARGEPHI_EXTRACTSLICE9]], [[COND5_TRUE]] ] +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE046:%.*]] = insertelement <5 x double> poison, double [[TMP25]], i64 0 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE147:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE046]], double [[TMP26]], i64 1 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE248:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE147]], double [[TMP27]], i64 2 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE349:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE248]], double [[TMP28]], i64 3 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE450:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE349]], double [[TMP29]], i64 4 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE051:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE450]], i64 0 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE253:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE450]], i64 1 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE455:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE450]], i64 2 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE657:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE450]], i64 3 +; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE859:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE450]], i64 4 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[TMP30:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE051]], [[COND5_END]] ], [ [[LARGEPHI_EXTRACTSLICE152]], [[COND5_TRUE]] ] +; CHECK-NEXT: [[TMP31:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE253]], [[COND5_END]] ], [ [[LARGEPHI_EXTRACTSLICE354]], [[COND5_TRUE]] ] +; CHECK-NEXT: [[TMP32:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE455]], [[COND5_END]] ], [ [[LARGEPHI_EXTRACTSLICE556]], [[COND5_TRUE]] ] +; CHECK-NEXT: [[TMP33:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE657]], [[COND5_END]] ], [ [[LARGEPHI_EXTRACTSLICE758]], [[COND5_TRUE]] ] +; CHECK-NEXT: [[TMP34:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE859]], [[COND5_END]] ], [ [[LARGEPHI_EXTRACTSLICE960]], [[COND5_TRUE]] ] +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE061:%.*]] = insertelement <5 x double> poison, double [[TMP30]], i64 0 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE162:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE061]], double [[TMP31]], i64 1 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE263:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE162]], double [[TMP32]], i64 2 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE364:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE263]], double [[TMP33]], i64 3 +; CHECK-NEXT: [[LARGEPHI_INSERTSLICE465:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE364]], double [[TMP34]], i64 4 +; CHECK-NEXT: store <5 x double> [[LARGEPHI_INSERTSLICE465]], ptr [[OUT:%.*]], align 1 +; CHECK-NEXT: ret void +; +entry: + br i1 %cond, label %cond0.true, label %cond0.end + +cond0.true: + br label %cond0.end + +cond0.end: + %val.0 = phi <5 x double> [ %in, %entry ], [ zeroinitializer, %cond0.true ] + br i1 %cond, label %cond1.true, label %cond1.end + +cond1.true: + br label %cond1.end + +cond1.end: + %val.1 = phi <5 x double> [ %val.0, %cond0.end ], [ zeroinitializer, %cond1.true ] + br i1 %cond, label %cond2.true, label %cond2.end + +cond2.true: + br label %cond2.end + +cond2.end: + %val.2 = phi <5 x double> [ %val.1, %cond1.end ], [ zeroinitializer, %cond2.true ] + br i1 %cond, label %cond3.true, label %cond3.end + +cond3.true: + %a = insertelement <5 x double> %val.0, double 7.140000e+00, i64 4 + br label %cond3.end + +cond3.end: + %val.3 = phi <5 x double> [ %val.2, %cond2.end ], [ zeroinitializer, %cond3.true ] + br i1 %cond, label %cond4.true, label %cond4.end + +cond4.true: + br label %cond4.end + +cond4.end: + %val.4 = phi <5 x double> [ %val.3, %cond3.end ], [ zeroinitializer, %cond4.true ] + br i1 %cond, label %cond5.true, label %cond5.end + +cond5.true: + br i1 %cond, label %end, label %cond5.end + +cond5.end: + %val.5 = phi <5 x double> [ %val.4, %cond4.end ], [ %in, %cond5.true ] + br label %end + +end: + %endval = phi <5 x double> [ %val.5, %cond5.end ], [ %in, %cond5.true ] + store <5 x double> %endval, ptr %out, align 1 + ret void +}