diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -907,6 +907,29 @@ void buildExternalUses(const ExtraValueToDebugLocsMap &ExternallyUsedValues = {}); + /// Saves horizontal reductions that failed because of mutual extraction cost. + bool cacheVectorizableValues() { + bool Changed = false; + for (unsigned I = 0, E = VectorizableTree.size(); I < E; ++I) { + TreeEntry &TE = *VectorizableTree[I]; + for (int Lane = 0, LE = TE.Scalars.size(); Lane != LE; ++Lane) { + Value *Scalar = TE.Scalars[Lane]; + for (User *U : Scalar->users()) { + Instruction *UserInst = dyn_cast(U); + if (UserInst && !isDeleted(UserInst) && + ((UserIgnoreList && UserIgnoreList->contains(U)) || + getTreeEntry(U) == &TE)) + Changed |= VectorizableValues[Scalar].insert(U).second; + } + } + } + return Changed; + } + + void eraseVectorizableValue(Value *V) { VectorizableValues.erase(V); } + + bool foundVectorizableValues() { return FoundVectorizableValues; } + /// Clear the internal data structures that are created by 'buildTree'. void deleteTree() { VectorizableTree.clear(); @@ -2618,8 +2641,8 @@ /// This POD struct describes one external user in the vectorized tree. struct ExternalUser { - ExternalUser(Value *S, llvm::User *U, int L) - : Scalar(S), User(U), Lane(L) {} + ExternalUser(Value *S, llvm::User *U, int L, bool SC = false) + : Scalar(S), User(U), Lane(L), SkipCost(SC) {} // Which scalar in our function. Value *Scalar; @@ -2629,6 +2652,8 @@ // Which lane does the scalar belong to. int Lane; + + bool SkipCost; }; using UserList = SmallVector; @@ -2682,6 +2707,10 @@ /// after vectorization. UserList ExternalUses; + DenseMap> VectorizableValues; + + bool FoundVectorizableValues; + /// Values used only by @llvm.assume calls. SmallPtrSet EphValues; @@ -4280,7 +4309,15 @@ LLVM_DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane " << Lane << " from " << *Scalar << ".\n"); - ExternalUses.push_back(ExternalUser(Scalar, U, FoundLane)); + + bool SkipCost = VectorizableValues[Scalar].contains(U) && + all_of(VectorizableValues[Scalar], [this, Entry, U](User *Usr) { + auto *UserInst = dyn_cast(Usr); + return !UserInst || isDeleted(UserInst) || Usr == U || + (UserIgnoreList && UserIgnoreList->contains(Usr)) || + getTreeEntry(Usr) == Entry; + }); + ExternalUses.push_back(ExternalUser(Scalar, U, FoundLane, SkipCost)); } } } @@ -7083,6 +7120,9 @@ SmallVector> FirstUsers; SmallVector DemandedElts; for (ExternalUser &EU : ExternalUses) { + if (EU.SkipCost) + continue; + // We only add extract cost once for the same scalar. if (!isa_and_nonnull(EU.User) && !ExtractCostCalculated.insert(EU.Scalar).second) @@ -7260,6 +7300,10 @@ Cost -= InsertCost; } + FoundVectorizableValues = + Cost >= -SLPCostThreshold && Cost - ExtractCost < -SLPCostThreshold ? + cacheVectorizableValues() : false; + #ifndef NDEBUG SmallString<256> Str; { @@ -8924,6 +8968,7 @@ #endif LLVM_DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n"); eraseInstruction(cast(Scalar)); + eraseVectorizableValue(Scalar); } } @@ -11271,6 +11316,8 @@ << " and threshold " << ore::NV("Threshold", -SLPCostThreshold); }); + if (V.foundVectorizableValues()) + return ReductionRoot; if (!AdjustReducedVals()) V.analyzedReductionVals(VL); continue; diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/multiple_reduction.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/multiple_reduction.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/multiple_reduction.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/multiple_reduction.ll @@ -14,389 +14,50 @@ ; CHECK-LABEL: @straight( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[ST:%.*]] to i64 -; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[P:%.*]], align 2 -; CHECK-NEXT: [[CONV:%.*]] = zext i16 [[TMP0]] to i32 -; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[CONV]], [[CONV]] -; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 1 -; CHECK-NEXT: [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX_1]], align 2 -; CHECK-NEXT: [[CONV_1:%.*]] = zext i16 [[TMP1]] to i32 -; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i32 [[CONV]], [[CONV_1]] -; CHECK-NEXT: [[MUL_1:%.*]] = mul nuw nsw i32 [[CONV_1]], [[CONV_1]] -; CHECK-NEXT: [[ADD11_1:%.*]] = add nuw i32 [[MUL_1]], [[MUL]] -; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 2 -; CHECK-NEXT: [[TMP2:%.*]] = load i16, i16* [[ARRAYIDX_2]], align 2 -; CHECK-NEXT: [[CONV_2:%.*]] = zext i16 [[TMP2]] to i32 -; CHECK-NEXT: [[ADD_2:%.*]] = add nuw nsw i32 [[ADD_1]], [[CONV_2]] -; CHECK-NEXT: [[MUL_2:%.*]] = mul nuw nsw i32 [[CONV_2]], [[CONV_2]] -; CHECK-NEXT: [[ADD11_2:%.*]] = add i32 [[MUL_2]], [[ADD11_1]] -; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 3 -; CHECK-NEXT: [[TMP3:%.*]] = load i16, i16* [[ARRAYIDX_3]], align 2 -; CHECK-NEXT: [[CONV_3:%.*]] = zext i16 [[TMP3]] to i32 -; CHECK-NEXT: [[ADD_3:%.*]] = add nuw nsw i32 [[ADD_2]], [[CONV_3]] -; CHECK-NEXT: [[MUL_3:%.*]] = mul nuw nsw i32 [[CONV_3]], [[CONV_3]] -; CHECK-NEXT: [[ADD11_3:%.*]] = add i32 [[MUL_3]], [[ADD11_2]] -; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 4 -; CHECK-NEXT: [[TMP4:%.*]] = load i16, i16* [[ARRAYIDX_4]], align 2 -; CHECK-NEXT: [[CONV_4:%.*]] = zext i16 [[TMP4]] to i32 -; CHECK-NEXT: [[ADD_4:%.*]] = add nuw nsw i32 [[ADD_3]], [[CONV_4]] -; CHECK-NEXT: [[MUL_4:%.*]] = mul nuw nsw i32 [[CONV_4]], [[CONV_4]] -; CHECK-NEXT: [[ADD11_4:%.*]] = add i32 [[MUL_4]], [[ADD11_3]] -; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 5 -; CHECK-NEXT: [[TMP5:%.*]] = load i16, i16* [[ARRAYIDX_5]], align 2 -; CHECK-NEXT: [[CONV_5:%.*]] = zext i16 [[TMP5]] to i32 -; CHECK-NEXT: [[ADD_5:%.*]] = add nuw nsw i32 [[ADD_4]], [[CONV_5]] -; CHECK-NEXT: [[MUL_5:%.*]] = mul nuw nsw i32 [[CONV_5]], [[CONV_5]] -; CHECK-NEXT: [[ADD11_5:%.*]] = add i32 [[MUL_5]], [[ADD11_4]] -; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 6 -; CHECK-NEXT: [[TMP6:%.*]] = load i16, i16* [[ARRAYIDX_6]], align 2 -; CHECK-NEXT: [[CONV_6:%.*]] = zext i16 [[TMP6]] to i32 -; CHECK-NEXT: [[ADD_6:%.*]] = add nuw nsw i32 [[ADD_5]], [[CONV_6]] -; CHECK-NEXT: [[MUL_6:%.*]] = mul nuw nsw i32 [[CONV_6]], [[CONV_6]] -; CHECK-NEXT: [[ADD11_6:%.*]] = add i32 [[MUL_6]], [[ADD11_5]] -; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 7 -; CHECK-NEXT: [[TMP7:%.*]] = load i16, i16* [[ARRAYIDX_7]], align 2 -; CHECK-NEXT: [[CONV_7:%.*]] = zext i16 [[TMP7]] to i32 -; CHECK-NEXT: [[ADD_7:%.*]] = add nuw nsw i32 [[ADD_6]], [[CONV_7]] -; CHECK-NEXT: [[MUL_7:%.*]] = mul nuw nsw i32 [[CONV_7]], [[CONV_7]] -; CHECK-NEXT: [[ADD11_7:%.*]] = add i32 [[MUL_7]], [[ADD11_6]] -; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IDX_EXT]] -; CHECK-NEXT: [[TMP8:%.*]] = load i16, i16* [[ADD_PTR]], align 2 -; CHECK-NEXT: [[CONV_140:%.*]] = zext i16 [[TMP8]] to i32 -; CHECK-NEXT: [[ADD_141:%.*]] = add nuw nsw i32 [[ADD_7]], [[CONV_140]] -; CHECK-NEXT: [[MUL_142:%.*]] = mul nuw nsw i32 [[CONV_140]], [[CONV_140]] -; CHECK-NEXT: [[ADD11_143:%.*]] = add i32 [[MUL_142]], [[ADD11_7]] -; CHECK-NEXT: [[ARRAYIDX_1_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i64 1 -; CHECK-NEXT: [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX_1_1]], align 2 -; CHECK-NEXT: [[CONV_1_1:%.*]] = zext i16 [[TMP9]] to i32 -; CHECK-NEXT: [[ADD_1_1:%.*]] = add nuw nsw i32 [[ADD_141]], [[CONV_1_1]] -; CHECK-NEXT: [[MUL_1_1:%.*]] = mul nuw nsw i32 [[CONV_1_1]], [[CONV_1_1]] -; CHECK-NEXT: [[ADD11_1_1:%.*]] = add i32 [[MUL_1_1]], [[ADD11_143]] -; CHECK-NEXT: [[ARRAYIDX_2_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i64 2 -; CHECK-NEXT: [[TMP10:%.*]] = load i16, i16* [[ARRAYIDX_2_1]], align 2 -; CHECK-NEXT: [[CONV_2_1:%.*]] = zext i16 [[TMP10]] to i32 -; CHECK-NEXT: [[ADD_2_1:%.*]] = add nuw nsw i32 [[ADD_1_1]], [[CONV_2_1]] -; CHECK-NEXT: [[MUL_2_1:%.*]] = mul nuw nsw i32 [[CONV_2_1]], [[CONV_2_1]] -; CHECK-NEXT: [[ADD11_2_1:%.*]] = add i32 [[MUL_2_1]], [[ADD11_1_1]] -; CHECK-NEXT: [[ARRAYIDX_3_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i64 3 -; CHECK-NEXT: [[TMP11:%.*]] = load i16, i16* [[ARRAYIDX_3_1]], align 2 -; CHECK-NEXT: [[CONV_3_1:%.*]] = zext i16 [[TMP11]] to i32 -; CHECK-NEXT: [[ADD_3_1:%.*]] = add nuw nsw i32 [[ADD_2_1]], [[CONV_3_1]] -; CHECK-NEXT: [[MUL_3_1:%.*]] = mul nuw nsw i32 [[CONV_3_1]], [[CONV_3_1]] -; CHECK-NEXT: [[ADD11_3_1:%.*]] = add i32 [[MUL_3_1]], [[ADD11_2_1]] -; CHECK-NEXT: [[ARRAYIDX_4_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i64 4 -; CHECK-NEXT: [[TMP12:%.*]] = load i16, i16* [[ARRAYIDX_4_1]], align 2 -; CHECK-NEXT: [[CONV_4_1:%.*]] = zext i16 [[TMP12]] to i32 -; CHECK-NEXT: [[ADD_4_1:%.*]] = add nuw nsw i32 [[ADD_3_1]], [[CONV_4_1]] -; CHECK-NEXT: [[MUL_4_1:%.*]] = mul nuw nsw i32 [[CONV_4_1]], [[CONV_4_1]] -; CHECK-NEXT: [[ADD11_4_1:%.*]] = add i32 [[MUL_4_1]], [[ADD11_3_1]] -; CHECK-NEXT: [[ARRAYIDX_5_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i64 5 -; CHECK-NEXT: [[TMP13:%.*]] = load i16, i16* [[ARRAYIDX_5_1]], align 2 -; CHECK-NEXT: [[CONV_5_1:%.*]] = zext i16 [[TMP13]] to i32 -; CHECK-NEXT: [[ADD_5_1:%.*]] = add nuw nsw i32 [[ADD_4_1]], [[CONV_5_1]] -; CHECK-NEXT: [[MUL_5_1:%.*]] = mul nuw nsw i32 [[CONV_5_1]], [[CONV_5_1]] -; CHECK-NEXT: [[ADD11_5_1:%.*]] = add i32 [[MUL_5_1]], [[ADD11_4_1]] -; CHECK-NEXT: [[ARRAYIDX_6_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i64 6 -; CHECK-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX_6_1]], align 2 -; CHECK-NEXT: [[CONV_6_1:%.*]] = zext i16 [[TMP14]] to i32 -; CHECK-NEXT: [[ADD_6_1:%.*]] = add nuw nsw i32 [[ADD_5_1]], [[CONV_6_1]] -; CHECK-NEXT: [[MUL_6_1:%.*]] = mul nuw nsw i32 [[CONV_6_1]], [[CONV_6_1]] -; CHECK-NEXT: [[ADD11_6_1:%.*]] = add i32 [[MUL_6_1]], [[ADD11_5_1]] -; CHECK-NEXT: [[ARRAYIDX_7_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i64 7 -; CHECK-NEXT: [[TMP15:%.*]] = load i16, i16* [[ARRAYIDX_7_1]], align 2 -; CHECK-NEXT: [[CONV_7_1:%.*]] = zext i16 [[TMP15]] to i32 -; CHECK-NEXT: [[ADD_7_1:%.*]] = add nuw nsw i32 [[ADD_6_1]], [[CONV_7_1]] -; CHECK-NEXT: [[MUL_7_1:%.*]] = mul nuw nsw i32 [[CONV_7_1]], [[CONV_7_1]] -; CHECK-NEXT: [[ADD11_7_1:%.*]] = add i32 [[MUL_7_1]], [[ADD11_6_1]] +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IDX_EXT]] ; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i64 [[IDX_EXT]] -; CHECK-NEXT: [[TMP16:%.*]] = load i16, i16* [[ADD_PTR_1]], align 2 -; CHECK-NEXT: [[CONV_244:%.*]] = zext i16 [[TMP16]] to i32 -; CHECK-NEXT: [[ADD_245:%.*]] = add nuw nsw i32 [[ADD_7_1]], [[CONV_244]] -; CHECK-NEXT: [[MUL_246:%.*]] = mul nuw nsw i32 [[CONV_244]], [[CONV_244]] -; CHECK-NEXT: [[ADD11_247:%.*]] = add i32 [[MUL_246]], [[ADD11_7_1]] -; CHECK-NEXT: [[ARRAYIDX_1_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i64 1 -; CHECK-NEXT: [[TMP17:%.*]] = load i16, i16* [[ARRAYIDX_1_2]], align 2 -; CHECK-NEXT: [[CONV_1_2:%.*]] = zext i16 [[TMP17]] to i32 -; CHECK-NEXT: [[ADD_1_2:%.*]] = add nuw nsw i32 [[ADD_245]], [[CONV_1_2]] -; CHECK-NEXT: [[MUL_1_2:%.*]] = mul nuw nsw i32 [[CONV_1_2]], [[CONV_1_2]] -; CHECK-NEXT: [[ADD11_1_2:%.*]] = add i32 [[MUL_1_2]], [[ADD11_247]] -; CHECK-NEXT: [[ARRAYIDX_2_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i64 2 -; CHECK-NEXT: [[TMP18:%.*]] = load i16, i16* [[ARRAYIDX_2_2]], align 2 -; CHECK-NEXT: [[CONV_2_2:%.*]] = zext i16 [[TMP18]] to i32 -; CHECK-NEXT: [[ADD_2_2:%.*]] = add nuw nsw i32 [[ADD_1_2]], [[CONV_2_2]] -; CHECK-NEXT: [[MUL_2_2:%.*]] = mul nuw nsw i32 [[CONV_2_2]], [[CONV_2_2]] -; CHECK-NEXT: [[ADD11_2_2:%.*]] = add i32 [[MUL_2_2]], [[ADD11_1_2]] -; CHECK-NEXT: [[ARRAYIDX_3_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i64 3 -; CHECK-NEXT: [[TMP19:%.*]] = load i16, i16* [[ARRAYIDX_3_2]], align 2 -; CHECK-NEXT: [[CONV_3_2:%.*]] = zext i16 [[TMP19]] to i32 -; CHECK-NEXT: [[ADD_3_2:%.*]] = add nuw nsw i32 [[ADD_2_2]], [[CONV_3_2]] -; CHECK-NEXT: [[MUL_3_2:%.*]] = mul nuw nsw i32 [[CONV_3_2]], [[CONV_3_2]] -; CHECK-NEXT: [[ADD11_3_2:%.*]] = add i32 [[MUL_3_2]], [[ADD11_2_2]] -; CHECK-NEXT: [[ARRAYIDX_4_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i64 4 -; CHECK-NEXT: [[TMP20:%.*]] = load i16, i16* [[ARRAYIDX_4_2]], align 2 -; CHECK-NEXT: [[CONV_4_2:%.*]] = zext i16 [[TMP20]] to i32 -; CHECK-NEXT: [[ADD_4_2:%.*]] = add nuw nsw i32 [[ADD_3_2]], [[CONV_4_2]] -; CHECK-NEXT: [[MUL_4_2:%.*]] = mul nuw nsw i32 [[CONV_4_2]], [[CONV_4_2]] -; CHECK-NEXT: [[ADD11_4_2:%.*]] = add i32 [[MUL_4_2]], [[ADD11_3_2]] -; CHECK-NEXT: [[ARRAYIDX_5_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i64 5 -; CHECK-NEXT: [[TMP21:%.*]] = load i16, i16* [[ARRAYIDX_5_2]], align 2 -; CHECK-NEXT: [[CONV_5_2:%.*]] = zext i16 [[TMP21]] to i32 -; CHECK-NEXT: [[ADD_5_2:%.*]] = add nuw nsw i32 [[ADD_4_2]], [[CONV_5_2]] -; CHECK-NEXT: [[MUL_5_2:%.*]] = mul nuw nsw i32 [[CONV_5_2]], [[CONV_5_2]] -; CHECK-NEXT: [[ADD11_5_2:%.*]] = add i32 [[MUL_5_2]], [[ADD11_4_2]] -; CHECK-NEXT: [[ARRAYIDX_6_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i64 6 -; CHECK-NEXT: [[TMP22:%.*]] = load i16, i16* [[ARRAYIDX_6_2]], align 2 -; CHECK-NEXT: [[CONV_6_2:%.*]] = zext i16 [[TMP22]] to i32 -; CHECK-NEXT: [[ADD_6_2:%.*]] = add nuw nsw i32 [[ADD_5_2]], [[CONV_6_2]] -; CHECK-NEXT: [[MUL_6_2:%.*]] = mul nuw nsw i32 [[CONV_6_2]], [[CONV_6_2]] -; CHECK-NEXT: [[ADD11_6_2:%.*]] = add i32 [[MUL_6_2]], [[ADD11_5_2]] -; CHECK-NEXT: [[ARRAYIDX_7_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i64 7 -; CHECK-NEXT: [[TMP23:%.*]] = load i16, i16* [[ARRAYIDX_7_2]], align 2 -; CHECK-NEXT: [[CONV_7_2:%.*]] = zext i16 [[TMP23]] to i32 -; CHECK-NEXT: [[ADD_7_2:%.*]] = add nuw nsw i32 [[ADD_6_2]], [[CONV_7_2]] -; CHECK-NEXT: [[MUL_7_2:%.*]] = mul nuw nsw i32 [[CONV_7_2]], [[CONV_7_2]] -; CHECK-NEXT: [[ADD11_7_2:%.*]] = add i32 [[MUL_7_2]], [[ADD11_6_2]] ; CHECK-NEXT: [[ADD_PTR_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i64 [[IDX_EXT]] -; CHECK-NEXT: [[TMP24:%.*]] = load i16, i16* [[ADD_PTR_2]], align 2 -; CHECK-NEXT: [[CONV_348:%.*]] = zext i16 [[TMP24]] to i32 -; CHECK-NEXT: [[ADD_349:%.*]] = add nuw nsw i32 [[ADD_7_2]], [[CONV_348]] -; CHECK-NEXT: [[MUL_350:%.*]] = mul nuw nsw i32 [[CONV_348]], [[CONV_348]] -; CHECK-NEXT: [[ADD11_351:%.*]] = add i32 [[MUL_350]], [[ADD11_7_2]] -; CHECK-NEXT: [[ARRAYIDX_1_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i64 1 -; CHECK-NEXT: [[TMP25:%.*]] = load i16, i16* [[ARRAYIDX_1_3]], align 2 -; CHECK-NEXT: [[CONV_1_3:%.*]] = zext i16 [[TMP25]] to i32 -; CHECK-NEXT: [[ADD_1_3:%.*]] = add nuw nsw i32 [[ADD_349]], [[CONV_1_3]] -; CHECK-NEXT: [[MUL_1_3:%.*]] = mul nuw nsw i32 [[CONV_1_3]], [[CONV_1_3]] -; CHECK-NEXT: [[ADD11_1_3:%.*]] = add i32 [[MUL_1_3]], [[ADD11_351]] -; CHECK-NEXT: [[ARRAYIDX_2_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i64 2 -; CHECK-NEXT: [[TMP26:%.*]] = load i16, i16* [[ARRAYIDX_2_3]], align 2 -; CHECK-NEXT: [[CONV_2_3:%.*]] = zext i16 [[TMP26]] to i32 -; CHECK-NEXT: [[ADD_2_3:%.*]] = add nuw nsw i32 [[ADD_1_3]], [[CONV_2_3]] -; CHECK-NEXT: [[MUL_2_3:%.*]] = mul nuw nsw i32 [[CONV_2_3]], [[CONV_2_3]] -; CHECK-NEXT: [[ADD11_2_3:%.*]] = add i32 [[MUL_2_3]], [[ADD11_1_3]] -; CHECK-NEXT: [[ARRAYIDX_3_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i64 3 -; CHECK-NEXT: [[TMP27:%.*]] = load i16, i16* [[ARRAYIDX_3_3]], align 2 -; CHECK-NEXT: [[CONV_3_3:%.*]] = zext i16 [[TMP27]] to i32 -; CHECK-NEXT: [[ADD_3_3:%.*]] = add nuw nsw i32 [[ADD_2_3]], [[CONV_3_3]] -; CHECK-NEXT: [[MUL_3_3:%.*]] = mul nuw nsw i32 [[CONV_3_3]], [[CONV_3_3]] -; CHECK-NEXT: [[ADD11_3_3:%.*]] = add i32 [[MUL_3_3]], [[ADD11_2_3]] -; CHECK-NEXT: [[ARRAYIDX_4_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i64 4 -; CHECK-NEXT: [[TMP28:%.*]] = load i16, i16* [[ARRAYIDX_4_3]], align 2 -; CHECK-NEXT: [[CONV_4_3:%.*]] = zext i16 [[TMP28]] to i32 -; CHECK-NEXT: [[ADD_4_3:%.*]] = add nuw nsw i32 [[ADD_3_3]], [[CONV_4_3]] -; CHECK-NEXT: [[MUL_4_3:%.*]] = mul nuw nsw i32 [[CONV_4_3]], [[CONV_4_3]] -; CHECK-NEXT: [[ADD11_4_3:%.*]] = add i32 [[MUL_4_3]], [[ADD11_3_3]] -; CHECK-NEXT: [[ARRAYIDX_5_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i64 5 -; CHECK-NEXT: [[TMP29:%.*]] = load i16, i16* [[ARRAYIDX_5_3]], align 2 -; CHECK-NEXT: [[CONV_5_3:%.*]] = zext i16 [[TMP29]] to i32 -; CHECK-NEXT: [[ADD_5_3:%.*]] = add nuw nsw i32 [[ADD_4_3]], [[CONV_5_3]] -; CHECK-NEXT: [[MUL_5_3:%.*]] = mul nuw nsw i32 [[CONV_5_3]], [[CONV_5_3]] -; CHECK-NEXT: [[ADD11_5_3:%.*]] = add i32 [[MUL_5_3]], [[ADD11_4_3]] -; CHECK-NEXT: [[ARRAYIDX_6_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i64 6 -; CHECK-NEXT: [[TMP30:%.*]] = load i16, i16* [[ARRAYIDX_6_3]], align 2 -; CHECK-NEXT: [[CONV_6_3:%.*]] = zext i16 [[TMP30]] to i32 -; CHECK-NEXT: [[ADD_6_3:%.*]] = add nuw nsw i32 [[ADD_5_3]], [[CONV_6_3]] -; CHECK-NEXT: [[MUL_6_3:%.*]] = mul nuw nsw i32 [[CONV_6_3]], [[CONV_6_3]] -; CHECK-NEXT: [[ADD11_6_3:%.*]] = add i32 [[MUL_6_3]], [[ADD11_5_3]] -; CHECK-NEXT: [[ARRAYIDX_7_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i64 7 -; CHECK-NEXT: [[TMP31:%.*]] = load i16, i16* [[ARRAYIDX_7_3]], align 2 -; CHECK-NEXT: [[CONV_7_3:%.*]] = zext i16 [[TMP31]] to i32 -; CHECK-NEXT: [[ADD_7_3:%.*]] = add nuw nsw i32 [[ADD_6_3]], [[CONV_7_3]] -; CHECK-NEXT: [[MUL_7_3:%.*]] = mul nuw nsw i32 [[CONV_7_3]], [[CONV_7_3]] -; CHECK-NEXT: [[ADD11_7_3:%.*]] = add i32 [[MUL_7_3]], [[ADD11_6_3]] ; CHECK-NEXT: [[ADD_PTR_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i64 [[IDX_EXT]] -; CHECK-NEXT: [[TMP32:%.*]] = load i16, i16* [[ADD_PTR_3]], align 2 -; CHECK-NEXT: [[CONV_452:%.*]] = zext i16 [[TMP32]] to i32 -; CHECK-NEXT: [[ADD_453:%.*]] = add nuw nsw i32 [[ADD_7_3]], [[CONV_452]] -; CHECK-NEXT: [[MUL_454:%.*]] = mul nuw nsw i32 [[CONV_452]], [[CONV_452]] -; CHECK-NEXT: [[ADD11_455:%.*]] = add i32 [[MUL_454]], [[ADD11_7_3]] -; CHECK-NEXT: [[ARRAYIDX_1_4:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_3]], i64 1 -; CHECK-NEXT: [[TMP33:%.*]] = load i16, i16* [[ARRAYIDX_1_4]], align 2 -; CHECK-NEXT: [[CONV_1_4:%.*]] = zext i16 [[TMP33]] to i32 -; CHECK-NEXT: [[ADD_1_4:%.*]] = add nuw nsw i32 [[ADD_453]], [[CONV_1_4]] -; CHECK-NEXT: [[MUL_1_4:%.*]] = mul nuw nsw i32 [[CONV_1_4]], [[CONV_1_4]] -; CHECK-NEXT: [[ADD11_1_4:%.*]] = add i32 [[MUL_1_4]], [[ADD11_455]] -; CHECK-NEXT: [[ARRAYIDX_2_4:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_3]], i64 2 -; CHECK-NEXT: [[TMP34:%.*]] = load i16, i16* [[ARRAYIDX_2_4]], align 2 -; CHECK-NEXT: [[CONV_2_4:%.*]] = zext i16 [[TMP34]] to i32 -; CHECK-NEXT: [[ADD_2_4:%.*]] = add nuw nsw i32 [[ADD_1_4]], [[CONV_2_4]] -; CHECK-NEXT: [[MUL_2_4:%.*]] = mul nuw nsw i32 [[CONV_2_4]], [[CONV_2_4]] -; CHECK-NEXT: [[ADD11_2_4:%.*]] = add i32 [[MUL_2_4]], [[ADD11_1_4]] -; CHECK-NEXT: [[ARRAYIDX_3_4:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_3]], i64 3 -; CHECK-NEXT: [[TMP35:%.*]] = load i16, i16* [[ARRAYIDX_3_4]], align 2 -; CHECK-NEXT: [[CONV_3_4:%.*]] = zext i16 [[TMP35]] to i32 -; CHECK-NEXT: [[ADD_3_4:%.*]] = add nuw nsw i32 [[ADD_2_4]], [[CONV_3_4]] -; CHECK-NEXT: [[MUL_3_4:%.*]] = mul nuw nsw i32 [[CONV_3_4]], [[CONV_3_4]] -; CHECK-NEXT: [[ADD11_3_4:%.*]] = add i32 [[MUL_3_4]], [[ADD11_2_4]] -; CHECK-NEXT: [[ARRAYIDX_4_4:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_3]], i64 4 -; CHECK-NEXT: [[TMP36:%.*]] = load i16, i16* [[ARRAYIDX_4_4]], align 2 -; CHECK-NEXT: [[CONV_4_4:%.*]] = zext i16 [[TMP36]] to i32 -; CHECK-NEXT: [[ADD_4_4:%.*]] = add nuw nsw i32 [[ADD_3_4]], [[CONV_4_4]] -; CHECK-NEXT: [[MUL_4_4:%.*]] = mul nuw nsw i32 [[CONV_4_4]], [[CONV_4_4]] -; CHECK-NEXT: [[ADD11_4_4:%.*]] = add i32 [[MUL_4_4]], [[ADD11_3_4]] -; CHECK-NEXT: [[ARRAYIDX_5_4:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_3]], i64 5 -; CHECK-NEXT: [[TMP37:%.*]] = load i16, i16* [[ARRAYIDX_5_4]], align 2 -; CHECK-NEXT: [[CONV_5_4:%.*]] = zext i16 [[TMP37]] to i32 -; CHECK-NEXT: [[ADD_5_4:%.*]] = add nuw nsw i32 [[ADD_4_4]], [[CONV_5_4]] -; CHECK-NEXT: [[MUL_5_4:%.*]] = mul nuw nsw i32 [[CONV_5_4]], [[CONV_5_4]] -; CHECK-NEXT: [[ADD11_5_4:%.*]] = add i32 [[MUL_5_4]], [[ADD11_4_4]] -; CHECK-NEXT: [[ARRAYIDX_6_4:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_3]], i64 6 -; CHECK-NEXT: [[TMP38:%.*]] = load i16, i16* [[ARRAYIDX_6_4]], align 2 -; CHECK-NEXT: [[CONV_6_4:%.*]] = zext i16 [[TMP38]] to i32 -; CHECK-NEXT: [[ADD_6_4:%.*]] = add nuw nsw i32 [[ADD_5_4]], [[CONV_6_4]] -; CHECK-NEXT: [[MUL_6_4:%.*]] = mul nuw nsw i32 [[CONV_6_4]], [[CONV_6_4]] -; CHECK-NEXT: [[ADD11_6_4:%.*]] = add i32 [[MUL_6_4]], [[ADD11_5_4]] -; CHECK-NEXT: [[ARRAYIDX_7_4:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_3]], i64 7 -; CHECK-NEXT: [[TMP39:%.*]] = load i16, i16* [[ARRAYIDX_7_4]], align 2 -; CHECK-NEXT: [[CONV_7_4:%.*]] = zext i16 [[TMP39]] to i32 -; CHECK-NEXT: [[ADD_7_4:%.*]] = add nuw nsw i32 [[ADD_6_4]], [[CONV_7_4]] -; CHECK-NEXT: [[MUL_7_4:%.*]] = mul nuw nsw i32 [[CONV_7_4]], [[CONV_7_4]] -; CHECK-NEXT: [[ADD11_7_4:%.*]] = add i32 [[MUL_7_4]], [[ADD11_6_4]] ; CHECK-NEXT: [[ADD_PTR_4:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_3]], i64 [[IDX_EXT]] -; CHECK-NEXT: [[TMP40:%.*]] = load i16, i16* [[ADD_PTR_4]], align 2 -; CHECK-NEXT: [[CONV_556:%.*]] = zext i16 [[TMP40]] to i32 -; CHECK-NEXT: [[ADD_557:%.*]] = add nuw nsw i32 [[ADD_7_4]], [[CONV_556]] -; CHECK-NEXT: [[MUL_558:%.*]] = mul nuw nsw i32 [[CONV_556]], [[CONV_556]] -; CHECK-NEXT: [[ADD11_559:%.*]] = add i32 [[MUL_558]], [[ADD11_7_4]] -; CHECK-NEXT: [[ARRAYIDX_1_5:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_4]], i64 1 -; CHECK-NEXT: [[TMP41:%.*]] = load i16, i16* [[ARRAYIDX_1_5]], align 2 -; CHECK-NEXT: [[CONV_1_5:%.*]] = zext i16 [[TMP41]] to i32 -; CHECK-NEXT: [[ADD_1_5:%.*]] = add nuw nsw i32 [[ADD_557]], [[CONV_1_5]] -; CHECK-NEXT: [[MUL_1_5:%.*]] = mul nuw nsw i32 [[CONV_1_5]], [[CONV_1_5]] -; CHECK-NEXT: [[ADD11_1_5:%.*]] = add i32 [[MUL_1_5]], [[ADD11_559]] -; CHECK-NEXT: [[ARRAYIDX_2_5:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_4]], i64 2 -; CHECK-NEXT: [[TMP42:%.*]] = load i16, i16* [[ARRAYIDX_2_5]], align 2 -; CHECK-NEXT: [[CONV_2_5:%.*]] = zext i16 [[TMP42]] to i32 -; CHECK-NEXT: [[ADD_2_5:%.*]] = add nuw nsw i32 [[ADD_1_5]], [[CONV_2_5]] -; CHECK-NEXT: [[MUL_2_5:%.*]] = mul nuw nsw i32 [[CONV_2_5]], [[CONV_2_5]] -; CHECK-NEXT: [[ADD11_2_5:%.*]] = add i32 [[MUL_2_5]], [[ADD11_1_5]] -; CHECK-NEXT: [[ARRAYIDX_3_5:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_4]], i64 3 -; CHECK-NEXT: [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX_3_5]], align 2 -; CHECK-NEXT: [[CONV_3_5:%.*]] = zext i16 [[TMP43]] to i32 -; CHECK-NEXT: [[ADD_3_5:%.*]] = add nuw nsw i32 [[ADD_2_5]], [[CONV_3_5]] -; CHECK-NEXT: [[MUL_3_5:%.*]] = mul nuw nsw i32 [[CONV_3_5]], [[CONV_3_5]] -; CHECK-NEXT: [[ADD11_3_5:%.*]] = add i32 [[MUL_3_5]], [[ADD11_2_5]] -; CHECK-NEXT: [[ARRAYIDX_4_5:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_4]], i64 4 -; CHECK-NEXT: [[TMP44:%.*]] = load i16, i16* [[ARRAYIDX_4_5]], align 2 -; CHECK-NEXT: [[CONV_4_5:%.*]] = zext i16 [[TMP44]] to i32 -; CHECK-NEXT: [[ADD_4_5:%.*]] = add nuw nsw i32 [[ADD_3_5]], [[CONV_4_5]] -; CHECK-NEXT: [[MUL_4_5:%.*]] = mul nuw nsw i32 [[CONV_4_5]], [[CONV_4_5]] -; CHECK-NEXT: [[ADD11_4_5:%.*]] = add i32 [[MUL_4_5]], [[ADD11_3_5]] -; CHECK-NEXT: [[ARRAYIDX_5_5:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_4]], i64 5 -; CHECK-NEXT: [[TMP45:%.*]] = load i16, i16* [[ARRAYIDX_5_5]], align 2 -; CHECK-NEXT: [[CONV_5_5:%.*]] = zext i16 [[TMP45]] to i32 -; CHECK-NEXT: [[ADD_5_5:%.*]] = add nuw nsw i32 [[ADD_4_5]], [[CONV_5_5]] -; CHECK-NEXT: [[MUL_5_5:%.*]] = mul nuw nsw i32 [[CONV_5_5]], [[CONV_5_5]] -; CHECK-NEXT: [[ADD11_5_5:%.*]] = add i32 [[MUL_5_5]], [[ADD11_4_5]] -; CHECK-NEXT: [[ARRAYIDX_6_5:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_4]], i64 6 -; CHECK-NEXT: [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX_6_5]], align 2 -; CHECK-NEXT: [[CONV_6_5:%.*]] = zext i16 [[TMP46]] to i32 -; CHECK-NEXT: [[ADD_6_5:%.*]] = add nuw nsw i32 [[ADD_5_5]], [[CONV_6_5]] -; CHECK-NEXT: [[MUL_6_5:%.*]] = mul nuw nsw i32 [[CONV_6_5]], [[CONV_6_5]] -; CHECK-NEXT: [[ADD11_6_5:%.*]] = add i32 [[MUL_6_5]], [[ADD11_5_5]] -; CHECK-NEXT: [[ARRAYIDX_7_5:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_4]], i64 7 -; CHECK-NEXT: [[TMP47:%.*]] = load i16, i16* [[ARRAYIDX_7_5]], align 2 -; CHECK-NEXT: [[CONV_7_5:%.*]] = zext i16 [[TMP47]] to i32 -; CHECK-NEXT: [[ADD_7_5:%.*]] = add nuw nsw i32 [[ADD_6_5]], [[CONV_7_5]] -; CHECK-NEXT: [[MUL_7_5:%.*]] = mul nuw nsw i32 [[CONV_7_5]], [[CONV_7_5]] -; CHECK-NEXT: [[ADD11_7_5:%.*]] = add i32 [[MUL_7_5]], [[ADD11_6_5]] ; CHECK-NEXT: [[ADD_PTR_5:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_4]], i64 [[IDX_EXT]] -; CHECK-NEXT: [[TMP48:%.*]] = load i16, i16* [[ADD_PTR_5]], align 2 -; CHECK-NEXT: [[CONV_660:%.*]] = zext i16 [[TMP48]] to i32 -; CHECK-NEXT: [[ADD_661:%.*]] = add nuw nsw i32 [[ADD_7_5]], [[CONV_660]] -; CHECK-NEXT: [[MUL_662:%.*]] = mul nuw nsw i32 [[CONV_660]], [[CONV_660]] -; CHECK-NEXT: [[ADD11_663:%.*]] = add i32 [[MUL_662]], [[ADD11_7_5]] -; CHECK-NEXT: [[ARRAYIDX_1_6:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_5]], i64 1 -; CHECK-NEXT: [[TMP49:%.*]] = load i16, i16* [[ARRAYIDX_1_6]], align 2 -; CHECK-NEXT: [[CONV_1_6:%.*]] = zext i16 [[TMP49]] to i32 -; CHECK-NEXT: [[ADD_1_6:%.*]] = add nuw nsw i32 [[ADD_661]], [[CONV_1_6]] -; CHECK-NEXT: [[MUL_1_6:%.*]] = mul nuw nsw i32 [[CONV_1_6]], [[CONV_1_6]] -; CHECK-NEXT: [[ADD11_1_6:%.*]] = add i32 [[MUL_1_6]], [[ADD11_663]] -; CHECK-NEXT: [[ARRAYIDX_2_6:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_5]], i64 2 -; CHECK-NEXT: [[TMP50:%.*]] = load i16, i16* [[ARRAYIDX_2_6]], align 2 -; CHECK-NEXT: [[CONV_2_6:%.*]] = zext i16 [[TMP50]] to i32 -; CHECK-NEXT: [[ADD_2_6:%.*]] = add nuw nsw i32 [[ADD_1_6]], [[CONV_2_6]] -; CHECK-NEXT: [[MUL_2_6:%.*]] = mul nuw nsw i32 [[CONV_2_6]], [[CONV_2_6]] -; CHECK-NEXT: [[ADD11_2_6:%.*]] = add i32 [[MUL_2_6]], [[ADD11_1_6]] -; CHECK-NEXT: [[ARRAYIDX_3_6:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_5]], i64 3 -; CHECK-NEXT: [[TMP51:%.*]] = load i16, i16* [[ARRAYIDX_3_6]], align 2 -; CHECK-NEXT: [[CONV_3_6:%.*]] = zext i16 [[TMP51]] to i32 -; CHECK-NEXT: [[ADD_3_6:%.*]] = add nuw nsw i32 [[ADD_2_6]], [[CONV_3_6]] -; CHECK-NEXT: [[MUL_3_6:%.*]] = mul nuw nsw i32 [[CONV_3_6]], [[CONV_3_6]] -; CHECK-NEXT: [[ADD11_3_6:%.*]] = add i32 [[MUL_3_6]], [[ADD11_2_6]] -; CHECK-NEXT: [[ARRAYIDX_4_6:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_5]], i64 4 -; CHECK-NEXT: [[TMP52:%.*]] = load i16, i16* [[ARRAYIDX_4_6]], align 2 -; CHECK-NEXT: [[CONV_4_6:%.*]] = zext i16 [[TMP52]] to i32 -; CHECK-NEXT: [[ADD_4_6:%.*]] = add nuw nsw i32 [[ADD_3_6]], [[CONV_4_6]] -; CHECK-NEXT: [[MUL_4_6:%.*]] = mul nuw nsw i32 [[CONV_4_6]], [[CONV_4_6]] -; CHECK-NEXT: [[ADD11_4_6:%.*]] = add i32 [[MUL_4_6]], [[ADD11_3_6]] -; CHECK-NEXT: [[ARRAYIDX_5_6:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_5]], i64 5 -; CHECK-NEXT: [[TMP53:%.*]] = load i16, i16* [[ARRAYIDX_5_6]], align 2 -; CHECK-NEXT: [[CONV_5_6:%.*]] = zext i16 [[TMP53]] to i32 -; CHECK-NEXT: [[ADD_5_6:%.*]] = add nuw nsw i32 [[ADD_4_6]], [[CONV_5_6]] -; CHECK-NEXT: [[MUL_5_6:%.*]] = mul nuw nsw i32 [[CONV_5_6]], [[CONV_5_6]] -; CHECK-NEXT: [[ADD11_5_6:%.*]] = add i32 [[MUL_5_6]], [[ADD11_4_6]] -; CHECK-NEXT: [[ARRAYIDX_6_6:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_5]], i64 6 -; CHECK-NEXT: [[TMP54:%.*]] = load i16, i16* [[ARRAYIDX_6_6]], align 2 -; CHECK-NEXT: [[CONV_6_6:%.*]] = zext i16 [[TMP54]] to i32 -; CHECK-NEXT: [[ADD_6_6:%.*]] = add nuw nsw i32 [[ADD_5_6]], [[CONV_6_6]] -; CHECK-NEXT: [[MUL_6_6:%.*]] = mul nuw nsw i32 [[CONV_6_6]], [[CONV_6_6]] -; CHECK-NEXT: [[ADD11_6_6:%.*]] = add i32 [[MUL_6_6]], [[ADD11_5_6]] -; CHECK-NEXT: [[ARRAYIDX_7_6:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_5]], i64 7 -; CHECK-NEXT: [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX_7_6]], align 2 -; CHECK-NEXT: [[CONV_7_6:%.*]] = zext i16 [[TMP55]] to i32 -; CHECK-NEXT: [[ADD_7_6:%.*]] = add nuw nsw i32 [[ADD_6_6]], [[CONV_7_6]] -; CHECK-NEXT: [[MUL_7_6:%.*]] = mul nuw nsw i32 [[CONV_7_6]], [[CONV_7_6]] -; CHECK-NEXT: [[ADD11_7_6:%.*]] = add i32 [[MUL_7_6]], [[ADD11_6_6]] ; CHECK-NEXT: [[ADD_PTR_6:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_5]], i64 [[IDX_EXT]] -; CHECK-NEXT: [[TMP56:%.*]] = load i16, i16* [[ADD_PTR_6]], align 2 -; CHECK-NEXT: [[CONV_764:%.*]] = zext i16 [[TMP56]] to i32 -; CHECK-NEXT: [[ADD_765:%.*]] = add nuw nsw i32 [[ADD_7_6]], [[CONV_764]] -; CHECK-NEXT: [[MUL_766:%.*]] = mul nuw nsw i32 [[CONV_764]], [[CONV_764]] -; CHECK-NEXT: [[ADD11_767:%.*]] = add i32 [[MUL_766]], [[ADD11_7_6]] -; CHECK-NEXT: [[ARRAYIDX_1_7:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_6]], i64 1 -; CHECK-NEXT: [[TMP57:%.*]] = load i16, i16* [[ARRAYIDX_1_7]], align 2 -; CHECK-NEXT: [[CONV_1_7:%.*]] = zext i16 [[TMP57]] to i32 -; CHECK-NEXT: [[ADD_1_7:%.*]] = add nuw nsw i32 [[ADD_765]], [[CONV_1_7]] -; CHECK-NEXT: [[MUL_1_7:%.*]] = mul nuw nsw i32 [[CONV_1_7]], [[CONV_1_7]] -; CHECK-NEXT: [[ADD11_1_7:%.*]] = add i32 [[MUL_1_7]], [[ADD11_767]] -; CHECK-NEXT: [[ARRAYIDX_2_7:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_6]], i64 2 -; CHECK-NEXT: [[TMP58:%.*]] = load i16, i16* [[ARRAYIDX_2_7]], align 2 -; CHECK-NEXT: [[CONV_2_7:%.*]] = zext i16 [[TMP58]] to i32 -; CHECK-NEXT: [[ADD_2_7:%.*]] = add nuw nsw i32 [[ADD_1_7]], [[CONV_2_7]] -; CHECK-NEXT: [[MUL_2_7:%.*]] = mul nuw nsw i32 [[CONV_2_7]], [[CONV_2_7]] -; CHECK-NEXT: [[ADD11_2_7:%.*]] = add i32 [[MUL_2_7]], [[ADD11_1_7]] -; CHECK-NEXT: [[ARRAYIDX_3_7:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_6]], i64 3 -; CHECK-NEXT: [[TMP59:%.*]] = load i16, i16* [[ARRAYIDX_3_7]], align 2 -; CHECK-NEXT: [[CONV_3_7:%.*]] = zext i16 [[TMP59]] to i32 -; CHECK-NEXT: [[ADD_3_7:%.*]] = add nuw nsw i32 [[ADD_2_7]], [[CONV_3_7]] -; CHECK-NEXT: [[MUL_3_7:%.*]] = mul nuw nsw i32 [[CONV_3_7]], [[CONV_3_7]] -; CHECK-NEXT: [[ADD11_3_7:%.*]] = add i32 [[MUL_3_7]], [[ADD11_2_7]] -; CHECK-NEXT: [[ARRAYIDX_4_7:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_6]], i64 4 -; CHECK-NEXT: [[TMP60:%.*]] = load i16, i16* [[ARRAYIDX_4_7]], align 2 -; CHECK-NEXT: [[CONV_4_7:%.*]] = zext i16 [[TMP60]] to i32 -; CHECK-NEXT: [[ADD_4_7:%.*]] = add nuw nsw i32 [[ADD_3_7]], [[CONV_4_7]] -; CHECK-NEXT: [[MUL_4_7:%.*]] = mul nuw nsw i32 [[CONV_4_7]], [[CONV_4_7]] -; CHECK-NEXT: [[ADD11_4_7:%.*]] = add i32 [[MUL_4_7]], [[ADD11_3_7]] -; CHECK-NEXT: [[ARRAYIDX_5_7:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_6]], i64 5 -; CHECK-NEXT: [[TMP61:%.*]] = load i16, i16* [[ARRAYIDX_5_7]], align 2 -; CHECK-NEXT: [[CONV_5_7:%.*]] = zext i16 [[TMP61]] to i32 -; CHECK-NEXT: [[ADD_5_7:%.*]] = add nuw nsw i32 [[ADD_4_7]], [[CONV_5_7]] -; CHECK-NEXT: [[MUL_5_7:%.*]] = mul nuw nsw i32 [[CONV_5_7]], [[CONV_5_7]] -; CHECK-NEXT: [[ADD11_5_7:%.*]] = add i32 [[MUL_5_7]], [[ADD11_4_7]] -; CHECK-NEXT: [[ARRAYIDX_6_7:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_6]], i64 6 -; CHECK-NEXT: [[TMP62:%.*]] = load i16, i16* [[ARRAYIDX_6_7]], align 2 -; CHECK-NEXT: [[CONV_6_7:%.*]] = zext i16 [[TMP62]] to i32 -; CHECK-NEXT: [[ADD_6_7:%.*]] = add nuw nsw i32 [[ADD_5_7]], [[CONV_6_7]] -; CHECK-NEXT: [[MUL_6_7:%.*]] = mul nuw nsw i32 [[CONV_6_7]], [[CONV_6_7]] -; CHECK-NEXT: [[ADD11_6_7:%.*]] = add i32 [[MUL_6_7]], [[ADD11_5_7]] -; CHECK-NEXT: [[ARRAYIDX_7_7:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_6]], i64 7 -; CHECK-NEXT: [[TMP63:%.*]] = load i16, i16* [[ARRAYIDX_7_7]], align 2 -; CHECK-NEXT: [[CONV_7_7:%.*]] = zext i16 [[TMP63]] to i32 -; CHECK-NEXT: [[ADD_7_7:%.*]] = add nuw nsw i32 [[ADD_6_7]], [[CONV_7_7]] -; CHECK-NEXT: [[MUL_7_7:%.*]] = mul nuw nsw i32 [[CONV_7_7]], [[CONV_7_7]] -; CHECK-NEXT: [[ADD11_7_7:%.*]] = add i32 [[MUL_7_7]], [[ADD11_6_7]] -; CHECK-NEXT: [[CONV15:%.*]] = zext i32 [[ADD_7_7]] to i64 -; CHECK-NEXT: [[CONV16:%.*]] = zext i32 [[ADD11_7_7]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[P]] to <8 x i16>* +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[ADD_PTR]] to <8 x i16>* +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[TMP2]], align 2 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[ADD_PTR_1]] to <8 x i16>* +; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[TMP4]], align 2 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16* [[ADD_PTR_2]] to <8 x i16>* +; CHECK-NEXT: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[TMP6]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[ADD_PTR_3]] to <8 x i16>* +; CHECK-NEXT: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[TMP8]], align 2 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16* [[ADD_PTR_4]] to <8 x i16>* +; CHECK-NEXT: [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[TMP10]], align 2 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16* [[ADD_PTR_5]] to <8 x i16>* +; CHECK-NEXT: [[TMP13:%.*]] = load <8 x i16>, <8 x i16>* [[TMP12]], align 2 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i16* [[ADD_PTR_6]] to <8 x i16>* +; CHECK-NEXT: [[TMP15:%.*]] = load <8 x i16>, <8 x i16>* [[TMP14]], align 2 +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <64 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> poison, <64 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <64 x i16> [[TMP16]], <64 x i16> [[TMP17]], <64 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x i16> [[TMP5]], <8 x i16> poison, <64 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <64 x i16> [[TMP18]], <64 x i16> [[TMP19]], <64 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <8 x i16> [[TMP7]], <8 x i16> poison, <64 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <64 x i16> [[TMP20]], <64 x i16> [[TMP21]], <64 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <8 x i16> [[TMP9]], <8 x i16> poison, <64 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <64 x i16> [[TMP22]], <64 x i16> [[TMP23]], <64 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <8 x i16> [[TMP11]], <8 x i16> poison, <64 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <64 x i16> [[TMP24]], <64 x i16> [[TMP25]], <64 x i32> +; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <8 x i16> [[TMP13]], <8 x i16> poison, <64 x i32> +; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <64 x i16> [[TMP26]], <64 x i16> [[TMP27]], <64 x i32> +; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <8 x i16> [[TMP15]], <8 x i16> poison, <64 x i32> +; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <64 x i16> [[TMP28]], <64 x i16> [[TMP29]], <64 x i32> +; CHECK-NEXT: [[TMP31:%.*]] = zext <64 x i16> [[TMP30]] to <64 x i32> +; CHECK-NEXT: [[TMP32:%.*]] = mul nuw nsw <64 x i32> [[TMP31]], [[TMP31]] +; CHECK-NEXT: [[TMP33:%.*]] = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> [[TMP31]]) +; CHECK-NEXT: [[TMP34:%.*]] = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> [[TMP32]]) +; CHECK-NEXT: [[CONV15:%.*]] = zext i32 [[TMP33]] to i64 +; CHECK-NEXT: [[CONV16:%.*]] = zext i32 [[TMP34]] to i64 ; CHECK-NEXT: [[SHL:%.*]] = shl nuw i64 [[CONV16]], 32 ; CHECK-NEXT: [[ADD17:%.*]] = or i64 [[SHL]], [[CONV15]] ; CHECK-NEXT: ret i64 [[ADD17]]