diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7075,6 +7075,66 @@ } // end anonymous namespace +static unsigned int getAggregateSize(Instruction *InsertInst, + unsigned int &AggregateSize) { + auto *IE = dyn_cast(InsertInst); + if (IE) { + AggregateSize = IE->getType()->getNumElements(); + return true; + } else { + auto *IV = cast(InsertInst); + AggregateSize = 1; + Type *CurrentType = IV->getType(); + do { + if (auto *ST = dyn_cast(CurrentType)) { + for (auto Elt : ST->elements()) + if (Elt != ST->getElementType(0)) // check homogeneity + return false; + AggregateSize *= ST->getNumElements(); + CurrentType = ST->getElementType(0); + } else if (auto *AT = dyn_cast(CurrentType)) { + AggregateSize *= AT->getNumElements(); + CurrentType = AT->getElementType(); + } else if (auto *VT = dyn_cast(CurrentType)) { + AggregateSize *= VT->getNumElements(); + return true; + } else if (CurrentType->isSingleValueType()) + return true; + else + return false; + } while (true); + } +} + +static bool getOperandIndex(Instruction *InsertInst, + unsigned int &OperandIndex) { + auto *IE = dyn_cast(InsertInst); + if (IE) { + if (auto *CI = dyn_cast(IE->getOperand(2))) { + auto *VT = dyn_cast(IE->getType()); + OperandIndex *= VT->getNumElements(); + OperandIndex += CI->getZExtValue(); + return true; + } else + return false; + } else { + auto *IV = cast(InsertInst); + Type *CurrentType = IV->getType(); + for (unsigned int Index : IV->indices()) { + if (auto *ST = dyn_cast(CurrentType)) { + OperandIndex *= ST->getNumElements(); + CurrentType = ST->getElementType(Index); + } else if (auto *AT = dyn_cast(CurrentType)) { + OperandIndex *= AT->getNumElements(); + CurrentType = AT->getElementType(); + } else + return false; + OperandIndex += Index; + } + return true; + } +} + /// Recognize construction of vectors like /// %ra = insertelement <4 x float> undef, float %s0, i32 0 /// %rb = insertelement <4 x float> %ra, float %s1, i32 1 @@ -7082,53 +7142,53 @@ /// %rd = insertelement <4 x float> %rc, float %s3, i32 3 /// starting from the last insertelement or insertvalue instruction. /// -/// Also recognize aggregates like {<2 x float>, <2 x float>}, +/// Also recognize homogeneous aggregates like {<2 x float>, <2 x float>}, /// {{float, float}, {float, float}}, [2 x {float, float}] and so on. /// See llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll for examples. /// /// Assume LastInsertInst is of InsertElementInst or InsertValueInst type. /// /// \return true if it matches. -static bool findBuildAggregate(Value *LastInsertInst, TargetTransformInfo *TTI, +static bool findBuildAggregate(Instruction *LastInsertInst, + TargetTransformInfo *TTI, SmallVectorImpl &BuildVectorOpds, - SmallVectorImpl &InsertElts) { + SmallVectorImpl &InsertElts, + unsigned int OperandOffset) { assert((isa(LastInsertInst) || isa(LastInsertInst)) && "Expected insertelement or insertvalue instruction!"); + + if (BuildVectorOpds.size() == 0) { + unsigned int AggregateSize; + if (!getAggregateSize(LastInsertInst, AggregateSize)) + return false; + BuildVectorOpds.resize(AggregateSize); + InsertElts.resize(AggregateSize); + } + do { - Value *InsertedOperand; - auto *IE = dyn_cast(LastInsertInst); - if (IE) { - InsertedOperand = IE->getOperand(1); - LastInsertInst = IE->getOperand(0); - } else { - auto *IV = cast(LastInsertInst); - InsertedOperand = IV->getInsertedValueOperand(); - LastInsertInst = IV->getAggregateOperand(); - } + Value *InsertedOperand = LastInsertInst->getOperand(1); + unsigned int OperandIndex = OperandOffset; + if (!getOperandIndex(LastInsertInst, OperandIndex)) + return false; if (isa(InsertedOperand) || isa(InsertedOperand)) { - SmallVector TmpBuildVectorOpds; - SmallVector TmpInsertElts; - if (!findBuildAggregate(InsertedOperand, TTI, TmpBuildVectorOpds, - TmpInsertElts)) + if (!findBuildAggregate(dyn_cast(InsertedOperand), TTI, + BuildVectorOpds, InsertElts, OperandIndex)) return false; - BuildVectorOpds.append(TmpBuildVectorOpds.rbegin(), - TmpBuildVectorOpds.rend()); - InsertElts.append(TmpInsertElts.rbegin(), TmpInsertElts.rend()); } else { - BuildVectorOpds.push_back(InsertedOperand); - InsertElts.push_back(IE); + BuildVectorOpds[OperandIndex] = InsertedOperand; + InsertElts[OperandIndex] = LastInsertInst; } - if (isa(LastInsertInst)) + if (isa(LastInsertInst->getOperand(0))) break; - if ((!isa(LastInsertInst) && + LastInsertInst = dyn_cast(LastInsertInst->getOperand(0)); + if (LastInsertInst == nullptr || + (!isa(LastInsertInst) && !isa(LastInsertInst)) || !LastInsertInst->hasOneUse()) return false; } while (true); - std::reverse(BuildVectorOpds.begin(), BuildVectorOpds.end()); - std::reverse(InsertElts.begin(), InsertElts.end()); return true; } @@ -7299,8 +7359,8 @@ SmallVector BuildVectorOpds; SmallVector BuildVectorInsts; - if (!findBuildAggregate(IVI, TTI, BuildVectorOpds, BuildVectorInsts) || - BuildVectorOpds.size() < 2) + if (!findBuildAggregate(IVI, TTI, BuildVectorOpds, BuildVectorInsts, 0) || + (!llvm::all_of(BuildVectorOpds, [](Value *V) { return V != nullptr; }))) return false; LLVM_DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n"); @@ -7314,8 +7374,8 @@ BasicBlock *BB, BoUpSLP &R) { SmallVector BuildVectorInsts; SmallVector BuildVectorOpds; - if (!findBuildAggregate(IEI, TTI, BuildVectorOpds, BuildVectorInsts) || - BuildVectorOpds.size() < 2 || + if (!findBuildAggregate(IEI, TTI, BuildVectorOpds, BuildVectorInsts, 0) || + (!llvm::all_of(BuildVectorOpds, [](Value *V) { return V != nullptr; })) || (llvm::all_of(BuildVectorOpds, [](Value *V) { return isa(V); }) && isShuffle(BuildVectorOpds))) diff --git a/llvm/test/Transforms/PhaseOrdering/X86/horiz-math.ll b/llvm/test/Transforms/PhaseOrdering/X86/horiz-math.ll --- a/llvm/test/Transforms/PhaseOrdering/X86/horiz-math.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/horiz-math.ll @@ -37,11 +37,10 @@ define <4 x float> @reverse_hadd_v4f32(<4 x float> %a, <4 x float> %b) #0 { ; CHECK-LABEL: @reverse_hadd_v4f32( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> [[A:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> [[A]], <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> -; CHECK-NEXT: ret <4 x float> [[TMP4]] +; CHECK-NEXT: ret <4 x float> [[TMP3]] ; %vecext = extractelement <4 x float> %a, i32 0 %vecext1 = extractelement <4 x float> %a, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll @@ -203,8 +203,10 @@ ; ; MAX-COST-LABEL: @PR32038( ; MAX-COST-NEXT: entry: -; MAX-COST-NEXT: [[TMP0:%.*]] = load <2 x i8>, <2 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <2 x i8>*), align 1 -; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[TMP0]], zeroinitializer +; MAX-COST-NEXT: [[P0:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1), align 1 +; MAX-COST-NEXT: [[P1:%.*]] = icmp eq i8 [[P0]], 0 +; MAX-COST-NEXT: [[P2:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 2), align 2 +; MAX-COST-NEXT: [[P3:%.*]] = icmp eq i8 [[P2]], 0 ; MAX-COST-NEXT: [[P4:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1 ; MAX-COST-NEXT: [[P5:%.*]] = icmp eq i8 [[P4]], 0 ; MAX-COST-NEXT: [[P6:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4 @@ -217,22 +219,20 @@ ; MAX-COST-NEXT: [[P13:%.*]] = icmp eq i8 [[P12]], 0 ; MAX-COST-NEXT: [[P14:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8 ; MAX-COST-NEXT: [[P15:%.*]] = icmp eq i8 [[P14]], 0 +; MAX-COST-NEXT: [[TMP0:%.*]] = insertelement <4 x i1> undef, i1 [[P1]], i32 0 +; MAX-COST-NEXT: [[TMP1:%.*]] = insertelement <4 x i1> [[TMP0]], i1 [[P3]], i32 1 +; MAX-COST-NEXT: [[TMP2:%.*]] = insertelement <4 x i1> [[TMP1]], i1 [[P5]], i32 2 +; MAX-COST-NEXT: [[TMP3:%.*]] = insertelement <4 x i1> [[TMP2]], i1 [[P7]], i32 3 ; MAX-COST-NEXT: br label [[FOR_BODY:%.*]] ; MAX-COST: for.body: ; MAX-COST-NEXT: [[P17:%.*]] = phi i32 [ [[P34:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; MAX-COST-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 -; MAX-COST-NEXT: [[TMP3:%.*]] = insertelement <4 x i1> undef, i1 [[TMP2]], i32 0 -; MAX-COST-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 -; MAX-COST-NEXT: [[TMP5:%.*]] = insertelement <4 x i1> [[TMP3]], i1 [[TMP4]], i32 1 -; MAX-COST-NEXT: [[TMP6:%.*]] = insertelement <4 x i1> [[TMP5]], i1 [[P5]], i32 2 -; MAX-COST-NEXT: [[TMP7:%.*]] = insertelement <4 x i1> [[TMP6]], i1 [[P7]], i32 3 -; MAX-COST-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> , <4 x i32> +; MAX-COST-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> , <4 x i32> ; MAX-COST-NEXT: [[P27:%.*]] = select i1 [[P9]], i32 -720, i32 -80 ; MAX-COST-NEXT: [[P29:%.*]] = select i1 [[P11]], i32 -720, i32 -80 -; MAX-COST-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP8]]) -; MAX-COST-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], [[P27]] -; MAX-COST-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], [[P29]] -; MAX-COST-NEXT: [[OP_EXTRA:%.*]] = add i32 [[TMP11]], -5 +; MAX-COST-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) +; MAX-COST-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], [[P27]] +; MAX-COST-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], [[P29]] +; MAX-COST-NEXT: [[OP_EXTRA:%.*]] = add i32 [[TMP7]], -5 ; MAX-COST-NEXT: [[P31:%.*]] = select i1 [[P13]], i32 -720, i32 -80 ; MAX-COST-NEXT: [[P32:%.*]] = add i32 [[OP_EXTRA]], [[P31]] ; MAX-COST-NEXT: [[P33:%.*]] = select i1 [[P15]], i32 -720, i32 -80 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll @@ -5,15 +5,13 @@ ; CHECK-LABEL: @_Z10fooConvertPDv4_xS0_S0_PKS_( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <16 x half> undef, i32 4 -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x half> undef, i32 5 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x half> undef, half [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x half> [[TMP2]], half [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x half> [[TMP3]] to <2 x float> -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <2 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i32 0 -; CHECK-NEXT: [[VECINS_I_4_I:%.*]] = insertelement <8 x i32> undef, i32 [[TMP6]], i32 4 -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1 -; CHECK-NEXT: [[VECINS_I_5_I:%.*]] = insertelement <8 x i32> [[VECINS_I_4_I]], i32 [[TMP7]], i32 5 +; CHECK-NEXT: [[CONV_I_4_I:%.*]] = fpext half [[TMP0]] to float +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[CONV_I_4_I]] to i32 +; CHECK-NEXT: [[VECINS_I_4_I:%.*]] = insertelement <8 x i32> undef, i32 [[TMP1]], i32 4 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x half> undef, i32 5 +; CHECK-NEXT: [[CONV_I_5_I:%.*]] = fpext half [[TMP2]] to float +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[CONV_I_5_I]] to i32 +; CHECK-NEXT: [[VECINS_I_5_I:%.*]] = insertelement <8 x i32> [[VECINS_I_4_I]], i32 [[TMP3]], i32 5 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll @@ -282,23 +282,26 @@ define <8 x float> @sitofp_4i32_8i16(<4 x i32> %a, <8 x i16> %b) { ; CHECK-LABEL: @sitofp_4i32_8i16( +; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 0 +; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x i32> [[A]], i32 1 +; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x i32> [[A]], i32 2 +; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x i32> [[A]], i32 3 ; CHECK-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 0 ; CHECK-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1 ; CHECK-NEXT: [[B2:%.*]] = extractelement <8 x i16> [[B]], i32 2 ; CHECK-NEXT: [[B3:%.*]] = extractelement <8 x i16> [[B]], i32 3 -; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> +; CHECK-NEXT: [[AB0:%.*]] = sitofp i32 [[A0]] to float +; CHECK-NEXT: [[AB1:%.*]] = sitofp i32 [[A1]] to float +; CHECK-NEXT: [[AB2:%.*]] = sitofp i32 [[A2]] to float +; CHECK-NEXT: [[AB3:%.*]] = sitofp i32 [[A3]] to float ; CHECK-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float ; CHECK-NEXT: [[AB5:%.*]] = sitofp i16 [[B1]] to float ; CHECK-NEXT: [[AB6:%.*]] = sitofp i16 [[B2]] to float ; CHECK-NEXT: [[AB7:%.*]] = sitofp i16 [[B3]] to float -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0 -; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1 -; CHECK-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2 -; CHECK-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[TMP4]], i32 2 -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 -; CHECK-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[TMP5]], i32 3 +; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[AB0]], i32 0 +; CHECK-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[AB1]], i32 1 +; CHECK-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[AB2]], i32 2 +; CHECK-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[AB3]], i32 3 ; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[AB4]], i32 4 ; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5 ; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6 @@ -334,109 +337,32 @@ ; Inspired by PR38154 define <8 x float> @sitofp_uitofp_4i32_8i16_16i8(<4 x i32> %a, <8 x i16> %b, <16 x i8> %c) { -; SSE-LABEL: @sitofp_uitofp_4i32_8i16_16i8( -; SSE-NEXT: [[A0:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 0 -; SSE-NEXT: [[A1:%.*]] = extractelement <4 x i32> [[A]], i32 1 -; SSE-NEXT: [[A2:%.*]] = extractelement <4 x i32> [[A]], i32 2 -; SSE-NEXT: [[A3:%.*]] = extractelement <4 x i32> [[A]], i32 3 -; SSE-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 0 -; SSE-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1 -; SSE-NEXT: [[C0:%.*]] = extractelement <16 x i8> [[C:%.*]], i32 0 -; SSE-NEXT: [[C1:%.*]] = extractelement <16 x i8> [[C]], i32 1 -; SSE-NEXT: [[AB0:%.*]] = sitofp i32 [[A0]] to float -; SSE-NEXT: [[AB1:%.*]] = sitofp i32 [[A1]] to float -; SSE-NEXT: [[AB2:%.*]] = uitofp i32 [[A2]] to float -; SSE-NEXT: [[AB3:%.*]] = uitofp i32 [[A3]] to float -; SSE-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float -; SSE-NEXT: [[AB5:%.*]] = uitofp i16 [[B1]] to float -; SSE-NEXT: [[AB6:%.*]] = sitofp i8 [[C0]] to float -; SSE-NEXT: [[AB7:%.*]] = uitofp i8 [[C1]] to float -; SSE-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[AB0]], i32 0 -; SSE-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[AB1]], i32 1 -; SSE-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[AB2]], i32 2 -; SSE-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[AB3]], i32 3 -; SSE-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[AB4]], i32 4 -; SSE-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5 -; SSE-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6 -; SSE-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7 -; SSE-NEXT: ret <8 x float> [[R7]] -; -; SLM-LABEL: @sitofp_uitofp_4i32_8i16_16i8( -; SLM-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 0 -; SLM-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1 -; SLM-NEXT: [[C0:%.*]] = extractelement <16 x i8> [[C:%.*]], i32 0 -; SLM-NEXT: [[C1:%.*]] = extractelement <16 x i8> [[C]], i32 1 -; SLM-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> -; SLM-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[A]] to <4 x float> -; SLM-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float -; SLM-NEXT: [[AB5:%.*]] = uitofp i16 [[B1]] to float -; SLM-NEXT: [[AB6:%.*]] = sitofp i8 [[C0]] to float -; SLM-NEXT: [[AB7:%.*]] = uitofp i8 [[C1]] to float -; SLM-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 0 -; SLM-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[TMP3]], i32 0 -; SLM-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 1 -; SLM-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[TMP4]], i32 1 -; SLM-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2 -; SLM-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[TMP5]], i32 2 -; SLM-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3 -; SLM-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[TMP6]], i32 3 -; SLM-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[AB4]], i32 4 -; SLM-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5 -; SLM-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6 -; SLM-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7 -; SLM-NEXT: ret <8 x float> [[R7]] -; -; AVX-LABEL: @sitofp_uitofp_4i32_8i16_16i8( -; AVX-NEXT: [[A0:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 0 -; AVX-NEXT: [[A1:%.*]] = extractelement <4 x i32> [[A]], i32 1 -; AVX-NEXT: [[A2:%.*]] = extractelement <4 x i32> [[A]], i32 2 -; AVX-NEXT: [[A3:%.*]] = extractelement <4 x i32> [[A]], i32 3 -; AVX-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 0 -; AVX-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1 -; AVX-NEXT: [[C0:%.*]] = extractelement <16 x i8> [[C:%.*]], i32 0 -; AVX-NEXT: [[C1:%.*]] = extractelement <16 x i8> [[C]], i32 1 -; AVX-NEXT: [[AB0:%.*]] = sitofp i32 [[A0]] to float -; AVX-NEXT: [[AB1:%.*]] = sitofp i32 [[A1]] to float -; AVX-NEXT: [[AB2:%.*]] = uitofp i32 [[A2]] to float -; AVX-NEXT: [[AB3:%.*]] = uitofp i32 [[A3]] to float -; AVX-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float -; AVX-NEXT: [[AB5:%.*]] = uitofp i16 [[B1]] to float -; AVX-NEXT: [[AB6:%.*]] = sitofp i8 [[C0]] to float -; AVX-NEXT: [[AB7:%.*]] = uitofp i8 [[C1]] to float -; AVX-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[AB0]], i32 0 -; AVX-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[AB1]], i32 1 -; AVX-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[AB2]], i32 2 -; AVX-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[AB3]], i32 3 -; AVX-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[AB4]], i32 4 -; AVX-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5 -; AVX-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6 -; AVX-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7 -; AVX-NEXT: ret <8 x float> [[R7]] -; -; AVX512-LABEL: @sitofp_uitofp_4i32_8i16_16i8( -; AVX512-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 0 -; AVX512-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1 -; AVX512-NEXT: [[C0:%.*]] = extractelement <16 x i8> [[C:%.*]], i32 0 -; AVX512-NEXT: [[C1:%.*]] = extractelement <16 x i8> [[C]], i32 1 -; AVX512-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> -; AVX512-NEXT: [[TMP2:%.*]] = uitofp <4 x i32> [[A]] to <4 x float> -; AVX512-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float -; AVX512-NEXT: [[AB5:%.*]] = uitofp i16 [[B1]] to float -; AVX512-NEXT: [[AB6:%.*]] = sitofp i8 [[C0]] to float -; AVX512-NEXT: [[AB7:%.*]] = uitofp i8 [[C1]] to float -; AVX512-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 0 -; AVX512-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[TMP3]], i32 0 -; AVX512-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 1 -; AVX512-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[TMP4]], i32 1 -; AVX512-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2 -; AVX512-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[TMP5]], i32 2 -; AVX512-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3 -; AVX512-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[TMP6]], i32 3 -; AVX512-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[AB4]], i32 4 -; AVX512-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5 -; AVX512-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6 -; AVX512-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7 -; AVX512-NEXT: ret <8 x float> [[R7]] +; CHECK-LABEL: @sitofp_uitofp_4i32_8i16_16i8( +; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 0 +; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x i32> [[A]], i32 1 +; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x i32> [[A]], i32 2 +; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x i32> [[A]], i32 3 +; CHECK-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B:%.*]], i32 0 +; CHECK-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1 +; CHECK-NEXT: [[C0:%.*]] = extractelement <16 x i8> [[C:%.*]], i32 0 +; CHECK-NEXT: [[C1:%.*]] = extractelement <16 x i8> [[C]], i32 1 +; CHECK-NEXT: [[AB0:%.*]] = sitofp i32 [[A0]] to float +; CHECK-NEXT: [[AB1:%.*]] = sitofp i32 [[A1]] to float +; CHECK-NEXT: [[AB2:%.*]] = uitofp i32 [[A2]] to float +; CHECK-NEXT: [[AB3:%.*]] = uitofp i32 [[A3]] to float +; CHECK-NEXT: [[AB4:%.*]] = sitofp i16 [[B0]] to float +; CHECK-NEXT: [[AB5:%.*]] = uitofp i16 [[B1]] to float +; CHECK-NEXT: [[AB6:%.*]] = sitofp i8 [[C0]] to float +; CHECK-NEXT: [[AB7:%.*]] = uitofp i8 [[C1]] to float +; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x float> undef, float [[AB0]], i32 0 +; CHECK-NEXT: [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[AB1]], i32 1 +; CHECK-NEXT: [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[AB2]], i32 2 +; CHECK-NEXT: [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[AB3]], i32 3 +; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[AB4]], i32 4 +; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[AB5]], i32 5 +; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[AB6]], i32 6 +; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[AB7]], i32 7 +; CHECK-NEXT: ret <8 x float> [[R7]] ; %a0 = extractelement <4 x i32> %a, i32 0 %a1 = extractelement <4 x i32> %a, i32 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll @@ -232,117 +232,40 @@ } define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) { -; SSE-LABEL: @ashr_lshr_shl_v8i32( -; SSE-NEXT: [[A0:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 0 -; SSE-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1 -; SSE-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6 -; SSE-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7 -; SSE-NEXT: [[B0:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 0 -; SSE-NEXT: [[B1:%.*]] = extractelement <8 x i32> [[B]], i32 1 -; SSE-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B]], i32 6 -; SSE-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7 -; SSE-NEXT: [[AB0:%.*]] = ashr i32 [[A0]], [[B0]] -; SSE-NEXT: [[AB1:%.*]] = ashr i32 [[A1]], [[B1]] -; SSE-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[A]], [[B]] -; SSE-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]] -; SSE-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]] -; SSE-NEXT: [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[AB0]], i32 0 -; SSE-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1 -; SSE-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2 -; SSE-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[TMP2]], i32 2 -; SSE-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3 -; SSE-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[TMP3]], i32 3 -; SSE-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP1]], i32 4 -; SSE-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[TMP4]], i32 4 -; SSE-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP1]], i32 5 -; SSE-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[TMP5]], i32 5 -; SSE-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6 -; SSE-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7 -; SSE-NEXT: ret <8 x i32> [[R7]] -; -; AVX1-LABEL: @ashr_lshr_shl_v8i32( -; AVX1-NEXT: [[A0:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 0 -; AVX1-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1 -; AVX1-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6 -; AVX1-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7 -; AVX1-NEXT: [[B0:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 0 -; AVX1-NEXT: [[B1:%.*]] = extractelement <8 x i32> [[B]], i32 1 -; AVX1-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B]], i32 6 -; AVX1-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7 -; AVX1-NEXT: [[AB0:%.*]] = ashr i32 [[A0]], [[B0]] -; AVX1-NEXT: [[AB1:%.*]] = ashr i32 [[A1]], [[B1]] -; AVX1-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[A]], [[B]] -; AVX1-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]] -; AVX1-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]] -; AVX1-NEXT: [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[AB0]], i32 0 -; AVX1-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1 -; AVX1-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2 -; AVX1-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[TMP2]], i32 2 -; AVX1-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3 -; AVX1-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[TMP3]], i32 3 -; AVX1-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP1]], i32 4 -; AVX1-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[TMP4]], i32 4 -; AVX1-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP1]], i32 5 -; AVX1-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[TMP5]], i32 5 -; AVX1-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6 -; AVX1-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7 -; AVX1-NEXT: ret <8 x i32> [[R7]] -; -; AVX2-LABEL: @ashr_lshr_shl_v8i32( -; AVX2-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 6 -; AVX2-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7 -; AVX2-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 6 -; AVX2-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7 -; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> -; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> -; AVX2-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] -; AVX2-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] -; AVX2-NEXT: [[TMP5:%.*]] = lshr <8 x i32> [[A]], [[B]] -; AVX2-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]] -; AVX2-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]] -; AVX2-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0 -; AVX2-NEXT: [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP6]], i32 0 -; AVX2-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1 -; AVX2-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[TMP7]], i32 1 -; AVX2-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2 -; AVX2-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[TMP8]], i32 2 -; AVX2-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3 -; AVX2-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[TMP9]], i32 3 -; AVX2-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP5]], i32 4 -; AVX2-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[TMP10]], i32 4 -; AVX2-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP5]], i32 5 -; AVX2-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[TMP11]], i32 5 -; AVX2-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6 -; AVX2-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7 -; AVX2-NEXT: ret <8 x i32> [[R7]] -; -; AVX512-LABEL: @ashr_lshr_shl_v8i32( -; AVX512-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 6 -; AVX512-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7 -; AVX512-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 6 -; AVX512-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7 -; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> -; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> undef, <4 x i32> -; AVX512-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], [[TMP2]] -; AVX512-NEXT: [[TMP4:%.*]] = lshr <4 x i32> [[TMP1]], [[TMP2]] -; AVX512-NEXT: [[TMP5:%.*]] = lshr <8 x i32> [[A]], [[B]] -; AVX512-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]] -; AVX512-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]] -; AVX512-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0 -; AVX512-NEXT: [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[TMP6]], i32 0 -; AVX512-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1 -; AVX512-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[TMP7]], i32 1 -; AVX512-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2 -; AVX512-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[TMP8]], i32 2 -; AVX512-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3 -; AVX512-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[TMP9]], i32 3 -; AVX512-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP5]], i32 4 -; AVX512-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[TMP10]], i32 4 -; AVX512-NEXT: [[TMP11:%.*]] = extractelement <8 x i32> [[TMP5]], i32 5 -; AVX512-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[TMP11]], i32 5 -; AVX512-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6 -; AVX512-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7 -; AVX512-NEXT: ret <8 x i32> [[R7]] +; CHECK-LABEL: @ashr_lshr_shl_v8i32( +; CHECK-NEXT: [[A0:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 0 +; CHECK-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1 +; CHECK-NEXT: [[A2:%.*]] = extractelement <8 x i32> [[A]], i32 2 +; CHECK-NEXT: [[A3:%.*]] = extractelement <8 x i32> [[A]], i32 3 +; CHECK-NEXT: [[A4:%.*]] = extractelement <8 x i32> [[A]], i32 4 +; CHECK-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5 +; CHECK-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6 +; CHECK-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7 +; CHECK-NEXT: [[B0:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 0 +; CHECK-NEXT: [[B1:%.*]] = extractelement <8 x i32> [[B]], i32 1 +; CHECK-NEXT: [[B2:%.*]] = extractelement <8 x i32> [[B]], i32 2 +; CHECK-NEXT: [[B3:%.*]] = extractelement <8 x i32> [[B]], i32 3 +; CHECK-NEXT: [[B4:%.*]] = extractelement <8 x i32> [[B]], i32 4 +; CHECK-NEXT: [[B5:%.*]] = extractelement <8 x i32> [[B]], i32 5 +; CHECK-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B]], i32 6 +; CHECK-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7 +; CHECK-NEXT: [[AB0:%.*]] = ashr i32 [[A0]], [[B0]] +; CHECK-NEXT: [[AB1:%.*]] = ashr i32 [[A1]], [[B1]] +; CHECK-NEXT: [[AB2:%.*]] = lshr i32 [[A2]], [[B2]] +; CHECK-NEXT: [[AB3:%.*]] = lshr i32 [[A3]], [[B3]] +; CHECK-NEXT: [[AB4:%.*]] = lshr i32 [[A4]], [[B4]] +; CHECK-NEXT: [[AB5:%.*]] = lshr i32 [[A5]], [[B5]] +; CHECK-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]] +; CHECK-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]] +; CHECK-NEXT: [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[AB0]], i32 0 +; CHECK-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1 +; CHECK-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[AB2]], i32 2 +; CHECK-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[AB3]], i32 3 +; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB4]], i32 4 +; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5 +; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6 +; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7 +; CHECK-NEXT: ret <8 x i32> [[R7]] ; %a0 = extractelement <8 x i32> %a, i32 0 %a1 = extractelement <8 x i32> %a, i32 1 @@ -425,10 +348,10 @@ ; CHECK-NEXT: [[AB5:%.*]] = sdiv i32 [[A5]], 4 ; CHECK-NEXT: [[AB6:%.*]] = sdiv i32 [[A6]], 8 ; CHECK-NEXT: [[AB7:%.*]] = sdiv i32 [[A7]], 16 -; CHECK-NEXT: [[R1:%.*]] = insertelement <8 x i32> undef, i32 [[AB1]], i32 1 -; CHECK-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[AB2]], i32 2 -; CHECK-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[AB3]], i32 3 -; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB5]], i32 5 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> undef, i32 [[AB1]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[TMP1]], i32 [[AB2]], i32 2 +; CHECK-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[AB3]], i32 3 +; CHECK-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5 ; CHECK-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6 ; CHECK-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7 ; CHECK-NEXT: ret <8 x i32> [[R7]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll @@ -126,13 +126,16 @@ ; doesn't matter define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { ; ANY-LABEL: @simple_select_insert_out_of_order( -; ANY-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer -; ANY-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]] -; ANY-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 +; ANY-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <4 x i32> [[C:%.*]], <4 x i32> undef, <4 x i32> +; ANY-NEXT: [[REORDER_SHUFFLE1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> undef, <4 x i32> +; ANY-NEXT: [[REORDER_SHUFFLE2:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> undef, <4 x i32> +; ANY-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[REORDER_SHUFFLE]], zeroinitializer +; ANY-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[REORDER_SHUFFLE1]], <4 x float> [[REORDER_SHUFFLE2]] +; ANY-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 2 ; ANY-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 2 ; ANY-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1 ; ANY-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP4]], i32 1 -; ANY-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2 +; ANY-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 ; ANY-NEXT: [[RC:%.*]] = insertelement <4 x float> [[RB]], float [[TMP5]], i32 0 ; ANY-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3 ; ANY-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP6]], i32 3 @@ -227,30 +230,18 @@ ; ANY-NEXT: [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1 ; ANY-NEXT: [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2 ; ANY-NEXT: [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3 -; ANY-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0 -; ANY-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1 -; ANY-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer -; ANY-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> undef, i32 [[C2]], i32 0 -; ANY-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[C3]], i32 1 -; ANY-NEXT: [[TMP6:%.*]] = icmp ne <2 x i32> [[TMP5]], zeroinitializer -; ANY-NEXT: [[TMP7:%.*]] = insertelement <2 x float> undef, float [[A0]], i32 0 -; ANY-NEXT: [[TMP8:%.*]] = insertelement <2 x float> [[TMP7]], float [[A1]], i32 1 -; ANY-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[B0]], i32 0 -; ANY-NEXT: [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[B1]], i32 1 -; ANY-NEXT: [[TMP11:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP8]], <2 x float> [[TMP10]] -; ANY-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[A2]], i32 0 -; ANY-NEXT: [[TMP13:%.*]] = insertelement <2 x float> [[TMP12]], float [[A3]], i32 1 -; ANY-NEXT: [[TMP14:%.*]] = insertelement <2 x float> undef, float [[B2]], i32 0 -; ANY-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[B3]], i32 1 -; ANY-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP6]], <2 x float> [[TMP13]], <2 x float> [[TMP15]] -; ANY-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[TMP11]], i32 0 -; ANY-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP17]], i32 0 -; ANY-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[TMP11]], i32 1 -; ANY-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP18]], i32 1 -; ANY-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[TMP16]], i32 0 -; ANY-NEXT: [[RC:%.*]] = insertelement <4 x float> undef, float [[TMP19]], i32 2 -; ANY-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[TMP16]], i32 1 -; ANY-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP20]], i32 3 +; ANY-NEXT: [[CMP0:%.*]] = icmp ne i32 [[C0]], 0 +; ANY-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0 +; ANY-NEXT: [[CMP2:%.*]] = icmp ne i32 [[C2]], 0 +; ANY-NEXT: [[CMP3:%.*]] = icmp ne i32 [[C3]], 0 +; ANY-NEXT: [[S0:%.*]] = select i1 [[CMP0]], float [[A0]], float [[B0]] +; ANY-NEXT: [[S1:%.*]] = select i1 [[CMP1]], float [[A1]], float [[B1]] +; ANY-NEXT: [[S2:%.*]] = select i1 [[CMP2]], float [[A2]], float [[B2]] +; ANY-NEXT: [[S3:%.*]] = select i1 [[CMP3]], float [[A3]], float [[B3]] +; ANY-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[S0]], i32 0 +; ANY-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[S1]], i32 1 +; ANY-NEXT: [[RC:%.*]] = insertelement <4 x float> undef, float [[S2]], i32 2 +; ANY-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[S3]], i32 3 ; ANY-NEXT: ret <4 x float> [[RD]] ; %c0 = extractelement <4 x i32> %c, i32 0 @@ -447,19 +438,19 @@ ; Make sure we handle multiple trees that feed one build vector correctly. define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) { ; ANY-LABEL: @multi_tree( -; ANY-NEXT: [[TMP1:%.*]] = insertelement <4 x double> undef, double [[W:%.*]], i32 0 -; ANY-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[X:%.*]], i32 1 -; ANY-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[Y:%.*]], i32 2 -; ANY-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[Z:%.*]], i32 3 -; ANY-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[TMP4]], +; ANY-NEXT: [[TMP1:%.*]] = insertelement <4 x double> undef, double [[Z:%.*]], i32 0 +; ANY-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[Y:%.*]], i32 1 +; ANY-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[X:%.*]], i32 2 +; ANY-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[W:%.*]], i32 3 +; ANY-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[TMP4]], ; ANY-NEXT: [[TMP6:%.*]] = fmul <4 x double> [[TMP5]], -; ANY-NEXT: [[TMP7:%.*]] = extractelement <4 x double> [[TMP6]], i32 0 +; ANY-NEXT: [[TMP7:%.*]] = extractelement <4 x double> [[TMP6]], i32 3 ; ANY-NEXT: [[I1:%.*]] = insertelement <4 x double> undef, double [[TMP7]], i32 3 -; ANY-NEXT: [[TMP8:%.*]] = extractelement <4 x double> [[TMP6]], i32 1 +; ANY-NEXT: [[TMP8:%.*]] = extractelement <4 x double> [[TMP6]], i32 2 ; ANY-NEXT: [[I2:%.*]] = insertelement <4 x double> [[I1]], double [[TMP8]], i32 2 -; ANY-NEXT: [[TMP9:%.*]] = extractelement <4 x double> [[TMP6]], i32 2 +; ANY-NEXT: [[TMP9:%.*]] = extractelement <4 x double> [[TMP6]], i32 1 ; ANY-NEXT: [[I3:%.*]] = insertelement <4 x double> [[I2]], double [[TMP9]], i32 1 -; ANY-NEXT: [[TMP10:%.*]] = extractelement <4 x double> [[TMP6]], i32 3 +; ANY-NEXT: [[TMP10:%.*]] = extractelement <4 x double> [[TMP6]], i32 0 ; ANY-NEXT: [[I4:%.*]] = insertelement <4 x double> [[I3]], double [[TMP10]], i32 0 ; ANY-NEXT: ret <4 x double> [[I4]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll @@ -54,13 +54,11 @@ ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X:%.*]], i64 0, i64 0 ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X]], i64 0, i64 1 ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X]], i64 0, i64 2 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[GEP0]] to <2 x float>* -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[X2:%.*]] = load float, float* [[GEP2]] -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 -; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 -; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x float> [[I0]], float [[TMP4]], i32 1 +; CHECK-NEXT: [[X0:%.*]] = load float, float* [[GEP0]], align 4 +; CHECK-NEXT: [[X1:%.*]] = load float, float* [[GEP1]], align 4 +; CHECK-NEXT: [[X2:%.*]] = load float, float* [[GEP2]], align 4 +; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> undef, float [[X0]], i32 0 +; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x float> [[I0]], float [[X1]], i32 1 ; CHECK-NEXT: [[I2:%.*]] = insertelement <4 x float> [[I1]], float [[X2]], i32 2 ; CHECK-NEXT: [[I3:%.*]] = insertelement <4 x float> [[I2]], float [[X2]], i32 3 ; CHECK-NEXT: ret <4 x float> [[I3]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll @@ -151,8 +151,8 @@ ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[R_052:%.*]] = phi float [ [[TMP0]], [[ENTRY]] ], [ [[ADD6:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = phi float [ [[TMP3]], [[ENTRY]] ], [ [[TMP11:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = phi float [ [[TMP0]], [[ENTRY]] ], [ [[TMP13:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x float> [ [[REORDER_SHUFFLE]], [[ENTRY]] ], [ [[TMP18:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = phi float [ [[TMP0]], [[ENTRY]] ], [ [[TMP9:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x float> [ [[REORDER_SHUFFLE]], [[ENTRY]] ], [ [[TMP17:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP5]], 7.000000e+00 ; CHECK-NEXT: [[ADD6]] = fadd float [[R_052]], [[MUL]] ; CHECK-NEXT: [[TMP7:%.*]] = add nsw i64 [[INDVARS_IV]], 2 @@ -160,29 +160,28 @@ ; CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[ARRAYIDX14]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 3 ; CHECK-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast float* [[ARRAYIDX19]] to <2 x float>* -; CHECK-NEXT: [[TMP10:%.*]] = load <2 x float>, <2 x float>* [[TMP9]], align 4 -; CHECK-NEXT: [[REORDER_SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> undef, <2 x i32> -; CHECK-NEXT: [[TMP11]] = extractelement <2 x float> [[REORDER_SHUFFLE1]], i32 0 +; CHECK-NEXT: [[TMP9]] = load float, float* [[ARRAYIDX19]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = add nsw i64 [[INDVARS_IV]], 4 +; CHECK-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP11]] = load float, float* [[ARRAYIDX24]], align 4 ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x float> undef, float [[TMP11]], i32 0 -; CHECK-NEXT: [[TMP13]] = extractelement <2 x float> [[REORDER_SHUFFLE1]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP12]], float [[TMP13]], i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP8]], i32 2 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x float> [[TMP15]], float [[TMP4]], i32 3 -; CHECK-NEXT: [[TMP17:%.*]] = fmul <4 x float> [[TMP16]], -; CHECK-NEXT: [[TMP18]] = fadd <4 x float> [[TMP6]], [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP19]], 121 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x float> [[TMP12]], float [[TMP9]], i32 1 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP8]], i32 2 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP4]], i32 3 +; CHECK-NEXT: [[TMP16:%.*]] = fmul <4 x float> [[TMP15]], +; CHECK-NEXT: [[TMP17]] = fadd <4 x float> [[TMP6]], [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP18]], 121 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[TMP18]], i32 3 -; CHECK-NEXT: [[ADD28:%.*]] = fadd float [[ADD6]], [[TMP20]] -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[TMP18]], i32 2 -; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[ADD28]], [[TMP21]] -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[TMP18]], i32 1 -; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP22]] -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x float> [[TMP18]], i32 0 -; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP23]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x float> [[TMP17]], i32 3 +; CHECK-NEXT: [[ADD28:%.*]] = fadd float [[ADD6]], [[TMP19]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[TMP17]], i32 2 +; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[ADD28]], [[TMP20]] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[TMP17]], i32 1 +; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[TMP17]], i32 0 +; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP22]] ; CHECK-NEXT: ret float [[ADD31]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll @@ -147,13 +147,13 @@ define {%StructTy, float, float} @NonHomogeneousStruct(float *%Ptr) { ; CHECK-LABEL: @NonHomogeneousStruct( ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds float, float* [[PTR:%.*]], i64 0 -; CHECK-NEXT: [[L0:%.*]] = load float, float* [[GEP0]] +; CHECK-NEXT: [[L0:%.*]] = load float, float* [[GEP0]], align 4 ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 1 -; CHECK-NEXT: [[L1:%.*]] = load float, float* [[GEP1]] +; CHECK-NEXT: [[L1:%.*]] = load float, float* [[GEP1]], align 4 ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 2 -; CHECK-NEXT: [[L2:%.*]] = load float, float* [[GEP2]] +; CHECK-NEXT: [[L2:%.*]] = load float, float* [[GEP2]], align 4 ; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds float, float* [[PTR]], i64 3 -; CHECK-NEXT: [[L3:%.*]] = load float, float* [[GEP3]] +; CHECK-NEXT: [[L3:%.*]] = load float, float* [[GEP3]], align 4 ; CHECK-NEXT: [[FADD0:%.*]] = fadd fast float [[L0]], 1.100000e+01 ; CHECK-NEXT: [[FADD1:%.*]] = fadd fast float [[L1]], 1.200000e+01 ; CHECK-NEXT: [[FADD2:%.*]] = fadd fast float [[L2]], 1.300000e+01 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll b/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll @@ -93,8 +93,12 @@ define <4 x double> @constant_folding() { ; CHECK-LABEL: @constant_folding( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x double> undef, double 1.000000e+00, i32 1 -; CHECK-NEXT: [[I2:%.*]] = insertelement <4 x double> [[I1]], double 2.000000e+00, i32 0 +; CHECK-NEXT: [[T0:%.*]] = fadd double 1.000000e+00, 0.000000e+00 +; CHECK-NEXT: [[T1:%.*]] = fadd double 1.000000e+00, 1.000000e+00 +; CHECK-NEXT: [[T2:%.*]] = fmul double [[T0]], 1.000000e+00 +; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x double> undef, double [[T2]], i32 1 +; CHECK-NEXT: [[T3:%.*]] = fmul double [[T1]], 1.000000e+00 +; CHECK-NEXT: [[I2:%.*]] = insertelement <4 x double> [[I1]], double [[T3]], i32 0 ; CHECK-NEXT: ret <4 x double> [[I2]] ; entry: