diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -907,6 +907,10 @@ void buildExternalUses(const ExtraValueToDebugLocsMap &ExternallyUsedValues = {}); + void addHorizontalReduction(Value *V) { HorizontalReductions.insert(V); } + + void clearHorizontalReductions() { HorizontalReductions.clear(); } + /// Clear the internal data structures that are created by 'buildTree'. void deleteTree() { VectorizableTree.clear(); @@ -2618,8 +2622,8 @@ /// This POD struct describes one external user in the vectorized tree. struct ExternalUser { - ExternalUser(Value *S, llvm::User *U, int L) - : Scalar(S), User(U), Lane(L) {} + ExternalUser(Value *S, llvm::User *U, int L, bool MR = false) + : Scalar(S), User(U), Lane(L), HasMutualReduction(MR) {} // Which scalar in our function. Value *Scalar; @@ -2629,6 +2633,8 @@ // Which lane does the scalar belong to. int Lane; + + bool HasMutualReduction; }; using UserList = SmallVector; @@ -2682,6 +2688,8 @@ /// after vectorization. UserList ExternalUses; + DenseSet HorizontalReductions; + /// Values used only by @llvm.assume calls. SmallPtrSet EphValues; @@ -4280,7 +4288,8 @@ LLVM_DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane " << Lane << " from " << *Scalar << ".\n"); - ExternalUses.push_back(ExternalUser(Scalar, U, FoundLane)); + bool R = HorizontalReductions.find(U) != HorizontalReductions.end(); + ExternalUses.push_back(ExternalUser(Scalar, U, FoundLane, R)); } } } @@ -7083,6 +7092,10 @@ SmallVector> FirstUsers; SmallVector DemandedElts; for (ExternalUser &EU : ExternalUses) { + // This user might get vectorized. + if (EU.HasMutualReduction) + continue; + // We only add extract cost once for the same scalar. if (!isa_and_nonnull(EU.User) && !ExtractCostCalculated.insert(EU.Scalar).second) @@ -10838,7 +10851,7 @@ /// Try to find a reduction tree. bool matchAssociativeReduction(PHINode *Phi, Instruction *Inst, ScalarEvolution &SE, const DataLayout &DL, - const TargetLibraryInfo &TLI) { + const TargetLibraryInfo &TLI, BoUpSLP &V) { assert((!Phi || is_contained(Phi->operands(), Inst)) && "Phi needs to use the binary operator"); assert((isa(Inst) || isa(Inst) || @@ -11018,6 +11031,15 @@ stable_sort(ReducedVals, [](ArrayRef P1, ArrayRef P2) { return P1.size() > P2.size(); }); + // Cache the reduction in the vectorizer. + for (ArrayRef RdxOps : ReductionOps) + for (Value *RdxOp : RdxOps) + if (RdxOp) + V.addHorizontalReduction(RdxOp); + for (ArrayRef RdxVals : ReducedVals) + for (Value *RdxVal : RdxVals) + if (RdxVal) + V.addHorizontalReduction(RdxVal); return true; } @@ -11035,7 +11057,11 @@ if (NumReducedVals < ReductionLimit) return nullptr; - IRBuilder<> Builder(cast(ReductionRoot)); + // The root of this reduction may have already been reduced. + auto RootInst = dyn_cast(ReductionRoot); + if (!RootInst) + return nullptr; + IRBuilder<> Builder(RootInst); // Track the reduced values in case if they are replaced by extractelement // because of the vectorization. @@ -11749,23 +11775,39 @@ std::queue> Stack; Stack.emplace(Root, 0); SmallPtrSet VisitedInstrs; + Optional PendingReduction = None; bool Res = false; - auto &&TryToReduce = [this, TTI, &P, &R](Instruction *Inst, Value *&B0, - Value *&B1) -> Value * { + auto &&MatchReduction = [this, TTI, &P, &R](Instruction *Inst, + Value *&B0, Value *&B1) -> Optional { if (R.isAnalyzedReductionRoot(Inst)) - return nullptr; + return None; bool IsBinop = matchRdxBop(Inst, B0, B1); bool IsSelect = match(Inst, m_Select(m_Value(), m_Value(), m_Value())); if (IsBinop || IsSelect) { HorizontalReduction HorRdx; - if (HorRdx.matchAssociativeReduction(P, Inst, *SE, *DL, *TLI)) - return HorRdx.tryToReduce(R, TTI); + if (HorRdx.matchAssociativeReduction(P, Inst, *SE, *DL, *TLI, R)) + return std::move(HorRdx); } - return nullptr; + return None; }; - while (!Stack.empty()) { + while (!Stack.empty() || PendingReduction) { Instruction *Inst; unsigned Level; + + if (Stack.empty()) { + // Try any pending reduction before quiting. + Value *V = PendingReduction->tryToReduce(R, TTI); + PendingReduction = None; + R.clearHorizontalReductions(); + if (V) { + Res = true; + P = nullptr; + if (auto *I = dyn_cast(V)) + Stack.emplace(I, Level); + } + continue; + } + std::tie(Inst, Level) = Stack.front(); Stack.pop(); // Do not try to analyze instruction that has already been vectorized. @@ -11773,18 +11815,28 @@ // iteration while stack was populated before that happened. if (R.isDeleted(Inst)) continue; - Value *B0 = nullptr, *B1 = nullptr; - if (Value *V = TryToReduce(Inst, B0, B1)) { - Res = true; - // Set P to nullptr to avoid re-analysis of phi node in - // matchAssociativeReduction function unless this is the root node. - P = nullptr; - if (auto *I = dyn_cast(V)) { - // Try to find another reduction. - Stack.emplace(I, Level); - continue; + + Value *V = nullptr, *B0 = nullptr, *B1 = nullptr; + Optional NewReduction = MatchReduction(Inst, B0, B1); + if (NewReduction) { + // Look ahead for mutual reductions before reducing this one. + if (!PendingReduction) + PendingReduction = std::move(NewReduction); + else { + V = PendingReduction->tryToReduce(R, TTI); + PendingReduction = std::move(NewReduction); + R.clearHorizontalReductions(); + if (V) { + Res = true; + P = nullptr; + if (auto *I = dyn_cast(V)) { + Stack.emplace(I, Level); + continue; + } + } } - } else { + } + if (!V) { bool IsBinop = B0 && B1; if (P && IsBinop) { Inst = dyn_cast(B0); diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/multiple_reduction.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/multiple_reduction.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/multiple_reduction.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/multiple_reduction.ll @@ -8,395 +8,55 @@ ; for i = ... ; sm += x[i]; ; sq += x[i] * x[i]; -; It currently doesn't SLP vectorize, but should. define i64 @straight(i16* nocapture noundef readonly %p, i32 noundef %st) { ; CHECK-LABEL: @straight( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[ST:%.*]] to i64 -; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[P:%.*]], align 2 -; CHECK-NEXT: [[CONV:%.*]] = zext i16 [[TMP0]] to i32 -; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[CONV]], [[CONV]] -; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 1 -; CHECK-NEXT: [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX_1]], align 2 -; CHECK-NEXT: [[CONV_1:%.*]] = zext i16 [[TMP1]] to i32 -; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i32 [[CONV]], [[CONV_1]] -; CHECK-NEXT: [[MUL_1:%.*]] = mul nuw nsw i32 [[CONV_1]], [[CONV_1]] -; CHECK-NEXT: [[ADD11_1:%.*]] = add nuw i32 [[MUL_1]], [[MUL]] -; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 2 -; CHECK-NEXT: [[TMP2:%.*]] = load i16, i16* [[ARRAYIDX_2]], align 2 -; CHECK-NEXT: [[CONV_2:%.*]] = zext i16 [[TMP2]] to i32 -; CHECK-NEXT: [[ADD_2:%.*]] = add nuw nsw i32 [[ADD_1]], [[CONV_2]] -; CHECK-NEXT: [[MUL_2:%.*]] = mul nuw nsw i32 [[CONV_2]], [[CONV_2]] -; CHECK-NEXT: [[ADD11_2:%.*]] = add i32 [[MUL_2]], [[ADD11_1]] -; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 3 -; CHECK-NEXT: [[TMP3:%.*]] = load i16, i16* [[ARRAYIDX_3]], align 2 -; CHECK-NEXT: [[CONV_3:%.*]] = zext i16 [[TMP3]] to i32 -; CHECK-NEXT: [[ADD_3:%.*]] = add nuw nsw i32 [[ADD_2]], [[CONV_3]] -; CHECK-NEXT: [[MUL_3:%.*]] = mul nuw nsw i32 [[CONV_3]], [[CONV_3]] -; CHECK-NEXT: [[ADD11_3:%.*]] = add i32 [[MUL_3]], [[ADD11_2]] -; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 4 -; CHECK-NEXT: [[TMP4:%.*]] = load i16, i16* [[ARRAYIDX_4]], align 2 -; CHECK-NEXT: [[CONV_4:%.*]] = zext i16 [[TMP4]] to i32 -; CHECK-NEXT: [[ADD_4:%.*]] = add nuw nsw i32 [[ADD_3]], [[CONV_4]] -; CHECK-NEXT: [[MUL_4:%.*]] = mul nuw nsw i32 [[CONV_4]], [[CONV_4]] -; CHECK-NEXT: [[ADD11_4:%.*]] = add i32 [[MUL_4]], [[ADD11_3]] -; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 5 -; CHECK-NEXT: [[TMP5:%.*]] = load i16, i16* [[ARRAYIDX_5]], align 2 -; CHECK-NEXT: [[CONV_5:%.*]] = zext i16 [[TMP5]] to i32 -; CHECK-NEXT: [[ADD_5:%.*]] = add nuw nsw i32 [[ADD_4]], [[CONV_5]] -; CHECK-NEXT: [[MUL_5:%.*]] = mul nuw nsw i32 [[CONV_5]], [[CONV_5]] -; CHECK-NEXT: [[ADD11_5:%.*]] = add i32 [[MUL_5]], [[ADD11_4]] -; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 6 -; CHECK-NEXT: [[TMP6:%.*]] = load i16, i16* [[ARRAYIDX_6]], align 2 -; CHECK-NEXT: [[CONV_6:%.*]] = zext i16 [[TMP6]] to i32 -; CHECK-NEXT: [[ADD_6:%.*]] = add nuw nsw i32 [[ADD_5]], [[CONV_6]] -; CHECK-NEXT: [[MUL_6:%.*]] = mul nuw nsw i32 [[CONV_6]], [[CONV_6]] -; CHECK-NEXT: [[ADD11_6:%.*]] = add i32 [[MUL_6]], [[ADD11_5]] -; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 7 -; CHECK-NEXT: [[TMP7:%.*]] = load i16, i16* [[ARRAYIDX_7]], align 2 -; CHECK-NEXT: [[CONV_7:%.*]] = zext i16 [[TMP7]] to i32 -; CHECK-NEXT: [[ADD_7:%.*]] = add nuw nsw i32 [[ADD_6]], [[CONV_7]] -; CHECK-NEXT: [[MUL_7:%.*]] = mul nuw nsw i32 [[CONV_7]], [[CONV_7]] -; CHECK-NEXT: [[ADD11_7:%.*]] = add i32 [[MUL_7]], [[ADD11_6]] -; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IDX_EXT]] -; CHECK-NEXT: [[TMP8:%.*]] = load i16, i16* [[ADD_PTR]], align 2 -; CHECK-NEXT: [[CONV_140:%.*]] = zext i16 [[TMP8]] to i32 -; CHECK-NEXT: [[ADD_141:%.*]] = add nuw nsw i32 [[ADD_7]], [[CONV_140]] -; CHECK-NEXT: [[MUL_142:%.*]] = mul nuw nsw i32 [[CONV_140]], [[CONV_140]] -; CHECK-NEXT: [[ADD11_143:%.*]] = add i32 [[MUL_142]], [[ADD11_7]] -; CHECK-NEXT: [[ARRAYIDX_1_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i64 1 -; CHECK-NEXT: [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX_1_1]], align 2 -; CHECK-NEXT: [[CONV_1_1:%.*]] = zext i16 [[TMP9]] to i32 -; CHECK-NEXT: [[ADD_1_1:%.*]] = add nuw nsw i32 [[ADD_141]], [[CONV_1_1]] -; CHECK-NEXT: [[MUL_1_1:%.*]] = mul nuw nsw i32 [[CONV_1_1]], [[CONV_1_1]] -; CHECK-NEXT: [[ADD11_1_1:%.*]] = add i32 [[MUL_1_1]], [[ADD11_143]] -; CHECK-NEXT: [[ARRAYIDX_2_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i64 2 -; CHECK-NEXT: [[TMP10:%.*]] = load i16, i16* [[ARRAYIDX_2_1]], align 2 -; CHECK-NEXT: [[CONV_2_1:%.*]] = zext i16 [[TMP10]] to i32 -; CHECK-NEXT: [[ADD_2_1:%.*]] = add nuw nsw i32 [[ADD_1_1]], [[CONV_2_1]] -; CHECK-NEXT: [[MUL_2_1:%.*]] = mul nuw nsw i32 [[CONV_2_1]], [[CONV_2_1]] -; CHECK-NEXT: [[ADD11_2_1:%.*]] = add i32 [[MUL_2_1]], [[ADD11_1_1]] -; CHECK-NEXT: [[ARRAYIDX_3_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i64 3 -; CHECK-NEXT: [[TMP11:%.*]] = load i16, i16* [[ARRAYIDX_3_1]], align 2 -; CHECK-NEXT: [[CONV_3_1:%.*]] = zext i16 [[TMP11]] to i32 -; CHECK-NEXT: [[ADD_3_1:%.*]] = add nuw nsw i32 [[ADD_2_1]], [[CONV_3_1]] -; CHECK-NEXT: [[MUL_3_1:%.*]] = mul nuw nsw i32 [[CONV_3_1]], [[CONV_3_1]] -; CHECK-NEXT: [[ADD11_3_1:%.*]] = add i32 [[MUL_3_1]], [[ADD11_2_1]] -; CHECK-NEXT: [[ARRAYIDX_4_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i64 4 -; CHECK-NEXT: [[TMP12:%.*]] = load i16, i16* [[ARRAYIDX_4_1]], align 2 -; CHECK-NEXT: [[CONV_4_1:%.*]] = zext i16 [[TMP12]] to i32 -; CHECK-NEXT: [[ADD_4_1:%.*]] = add nuw nsw i32 [[ADD_3_1]], [[CONV_4_1]] -; CHECK-NEXT: [[MUL_4_1:%.*]] = mul nuw nsw i32 [[CONV_4_1]], [[CONV_4_1]] -; CHECK-NEXT: [[ADD11_4_1:%.*]] = add i32 [[MUL_4_1]], [[ADD11_3_1]] -; CHECK-NEXT: [[ARRAYIDX_5_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i64 5 -; CHECK-NEXT: [[TMP13:%.*]] = load i16, i16* [[ARRAYIDX_5_1]], align 2 -; CHECK-NEXT: [[CONV_5_1:%.*]] = zext i16 [[TMP13]] to i32 -; CHECK-NEXT: [[ADD_5_1:%.*]] = add nuw nsw i32 [[ADD_4_1]], [[CONV_5_1]] -; CHECK-NEXT: [[MUL_5_1:%.*]] = mul nuw nsw i32 [[CONV_5_1]], [[CONV_5_1]] -; CHECK-NEXT: [[ADD11_5_1:%.*]] = add i32 [[MUL_5_1]], [[ADD11_4_1]] -; CHECK-NEXT: [[ARRAYIDX_6_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i64 6 -; CHECK-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX_6_1]], align 2 -; CHECK-NEXT: [[CONV_6_1:%.*]] = zext i16 [[TMP14]] to i32 -; CHECK-NEXT: [[ADD_6_1:%.*]] = add nuw nsw i32 [[ADD_5_1]], [[CONV_6_1]] -; CHECK-NEXT: [[MUL_6_1:%.*]] = mul nuw nsw i32 [[CONV_6_1]], [[CONV_6_1]] -; CHECK-NEXT: [[ADD11_6_1:%.*]] = add i32 [[MUL_6_1]], [[ADD11_5_1]] -; CHECK-NEXT: [[ARRAYIDX_7_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i64 7 -; CHECK-NEXT: [[TMP15:%.*]] = load i16, i16* [[ARRAYIDX_7_1]], align 2 -; CHECK-NEXT: [[CONV_7_1:%.*]] = zext i16 [[TMP15]] to i32 -; CHECK-NEXT: [[ADD_7_1:%.*]] = add nuw nsw i32 [[ADD_6_1]], [[CONV_7_1]] -; CHECK-NEXT: [[MUL_7_1:%.*]] = mul nuw nsw i32 [[CONV_7_1]], [[CONV_7_1]] -; CHECK-NEXT: [[ADD11_7_1:%.*]] = add i32 [[MUL_7_1]], [[ADD11_6_1]] +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IDX_EXT]] ; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR]], i64 [[IDX_EXT]] -; CHECK-NEXT: [[TMP16:%.*]] = load i16, i16* [[ADD_PTR_1]], align 2 -; CHECK-NEXT: [[CONV_244:%.*]] = zext i16 [[TMP16]] to i32 -; CHECK-NEXT: [[ADD_245:%.*]] = add nuw nsw i32 [[ADD_7_1]], [[CONV_244]] -; CHECK-NEXT: [[MUL_246:%.*]] = mul nuw nsw i32 [[CONV_244]], [[CONV_244]] -; CHECK-NEXT: [[ADD11_247:%.*]] = add i32 [[MUL_246]], [[ADD11_7_1]] -; CHECK-NEXT: [[ARRAYIDX_1_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i64 1 -; CHECK-NEXT: [[TMP17:%.*]] = load i16, i16* [[ARRAYIDX_1_2]], align 2 -; CHECK-NEXT: [[CONV_1_2:%.*]] = zext i16 [[TMP17]] to i32 -; CHECK-NEXT: [[ADD_1_2:%.*]] = add nuw nsw i32 [[ADD_245]], [[CONV_1_2]] -; CHECK-NEXT: [[MUL_1_2:%.*]] = mul nuw nsw i32 [[CONV_1_2]], [[CONV_1_2]] -; CHECK-NEXT: [[ADD11_1_2:%.*]] = add i32 [[MUL_1_2]], [[ADD11_247]] -; CHECK-NEXT: [[ARRAYIDX_2_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i64 2 -; CHECK-NEXT: [[TMP18:%.*]] = load i16, i16* [[ARRAYIDX_2_2]], align 2 -; CHECK-NEXT: [[CONV_2_2:%.*]] = zext i16 [[TMP18]] to i32 -; CHECK-NEXT: [[ADD_2_2:%.*]] = add nuw nsw i32 [[ADD_1_2]], [[CONV_2_2]] -; CHECK-NEXT: [[MUL_2_2:%.*]] = mul nuw nsw i32 [[CONV_2_2]], [[CONV_2_2]] -; CHECK-NEXT: [[ADD11_2_2:%.*]] = add i32 [[MUL_2_2]], [[ADD11_1_2]] -; CHECK-NEXT: [[ARRAYIDX_3_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i64 3 -; CHECK-NEXT: [[TMP19:%.*]] = load i16, i16* [[ARRAYIDX_3_2]], align 2 -; CHECK-NEXT: [[CONV_3_2:%.*]] = zext i16 [[TMP19]] to i32 -; CHECK-NEXT: [[ADD_3_2:%.*]] = add nuw nsw i32 [[ADD_2_2]], [[CONV_3_2]] -; CHECK-NEXT: [[MUL_3_2:%.*]] = mul nuw nsw i32 [[CONV_3_2]], [[CONV_3_2]] -; CHECK-NEXT: [[ADD11_3_2:%.*]] = add i32 [[MUL_3_2]], [[ADD11_2_2]] -; CHECK-NEXT: [[ARRAYIDX_4_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i64 4 -; CHECK-NEXT: [[TMP20:%.*]] = load i16, i16* [[ARRAYIDX_4_2]], align 2 -; CHECK-NEXT: [[CONV_4_2:%.*]] = zext i16 [[TMP20]] to i32 -; CHECK-NEXT: [[ADD_4_2:%.*]] = add nuw nsw i32 [[ADD_3_2]], [[CONV_4_2]] -; CHECK-NEXT: [[MUL_4_2:%.*]] = mul nuw nsw i32 [[CONV_4_2]], [[CONV_4_2]] -; CHECK-NEXT: [[ADD11_4_2:%.*]] = add i32 [[MUL_4_2]], [[ADD11_3_2]] -; CHECK-NEXT: [[ARRAYIDX_5_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i64 5 -; CHECK-NEXT: [[TMP21:%.*]] = load i16, i16* [[ARRAYIDX_5_2]], align 2 -; CHECK-NEXT: [[CONV_5_2:%.*]] = zext i16 [[TMP21]] to i32 -; CHECK-NEXT: [[ADD_5_2:%.*]] = add nuw nsw i32 [[ADD_4_2]], [[CONV_5_2]] -; CHECK-NEXT: [[MUL_5_2:%.*]] = mul nuw nsw i32 [[CONV_5_2]], [[CONV_5_2]] -; CHECK-NEXT: [[ADD11_5_2:%.*]] = add i32 [[MUL_5_2]], [[ADD11_4_2]] -; CHECK-NEXT: [[ARRAYIDX_6_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i64 6 -; CHECK-NEXT: [[TMP22:%.*]] = load i16, i16* [[ARRAYIDX_6_2]], align 2 -; CHECK-NEXT: [[CONV_6_2:%.*]] = zext i16 [[TMP22]] to i32 -; CHECK-NEXT: [[ADD_6_2:%.*]] = add nuw nsw i32 [[ADD_5_2]], [[CONV_6_2]] -; CHECK-NEXT: [[MUL_6_2:%.*]] = mul nuw nsw i32 [[CONV_6_2]], [[CONV_6_2]] -; CHECK-NEXT: [[ADD11_6_2:%.*]] = add i32 [[MUL_6_2]], [[ADD11_5_2]] -; CHECK-NEXT: [[ARRAYIDX_7_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i64 7 -; CHECK-NEXT: [[TMP23:%.*]] = load i16, i16* [[ARRAYIDX_7_2]], align 2 -; CHECK-NEXT: [[CONV_7_2:%.*]] = zext i16 [[TMP23]] to i32 -; CHECK-NEXT: [[ADD_7_2:%.*]] = add nuw nsw i32 [[ADD_6_2]], [[CONV_7_2]] -; CHECK-NEXT: [[MUL_7_2:%.*]] = mul nuw nsw i32 [[CONV_7_2]], [[CONV_7_2]] -; CHECK-NEXT: [[ADD11_7_2:%.*]] = add i32 [[MUL_7_2]], [[ADD11_6_2]] ; CHECK-NEXT: [[ADD_PTR_2:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_1]], i64 [[IDX_EXT]] -; CHECK-NEXT: [[TMP24:%.*]] = load i16, i16* [[ADD_PTR_2]], align 2 -; CHECK-NEXT: [[CONV_348:%.*]] = zext i16 [[TMP24]] to i32 -; CHECK-NEXT: [[ADD_349:%.*]] = add nuw nsw i32 [[ADD_7_2]], [[CONV_348]] -; CHECK-NEXT: [[MUL_350:%.*]] = mul nuw nsw i32 [[CONV_348]], [[CONV_348]] -; CHECK-NEXT: [[ADD11_351:%.*]] = add i32 [[MUL_350]], [[ADD11_7_2]] -; CHECK-NEXT: [[ARRAYIDX_1_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i64 1 -; CHECK-NEXT: [[TMP25:%.*]] = load i16, i16* [[ARRAYIDX_1_3]], align 2 -; CHECK-NEXT: [[CONV_1_3:%.*]] = zext i16 [[TMP25]] to i32 -; CHECK-NEXT: [[ADD_1_3:%.*]] = add nuw nsw i32 [[ADD_349]], [[CONV_1_3]] -; CHECK-NEXT: [[MUL_1_3:%.*]] = mul nuw nsw i32 [[CONV_1_3]], [[CONV_1_3]] -; CHECK-NEXT: [[ADD11_1_3:%.*]] = add i32 [[MUL_1_3]], [[ADD11_351]] -; CHECK-NEXT: [[ARRAYIDX_2_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i64 2 -; CHECK-NEXT: [[TMP26:%.*]] = load i16, i16* [[ARRAYIDX_2_3]], align 2 -; CHECK-NEXT: [[CONV_2_3:%.*]] = zext i16 [[TMP26]] to i32 -; CHECK-NEXT: [[ADD_2_3:%.*]] = add nuw nsw i32 [[ADD_1_3]], [[CONV_2_3]] -; CHECK-NEXT: [[MUL_2_3:%.*]] = mul nuw nsw i32 [[CONV_2_3]], [[CONV_2_3]] -; CHECK-NEXT: [[ADD11_2_3:%.*]] = add i32 [[MUL_2_3]], [[ADD11_1_3]] -; CHECK-NEXT: [[ARRAYIDX_3_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i64 3 -; CHECK-NEXT: [[TMP27:%.*]] = load i16, i16* [[ARRAYIDX_3_3]], align 2 -; CHECK-NEXT: [[CONV_3_3:%.*]] = zext i16 [[TMP27]] to i32 -; CHECK-NEXT: [[ADD_3_3:%.*]] = add nuw nsw i32 [[ADD_2_3]], [[CONV_3_3]] -; CHECK-NEXT: [[MUL_3_3:%.*]] = mul nuw nsw i32 [[CONV_3_3]], [[CONV_3_3]] -; CHECK-NEXT: [[ADD11_3_3:%.*]] = add i32 [[MUL_3_3]], [[ADD11_2_3]] -; CHECK-NEXT: [[ARRAYIDX_4_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i64 4 -; CHECK-NEXT: [[TMP28:%.*]] = load i16, i16* [[ARRAYIDX_4_3]], align 2 -; CHECK-NEXT: [[CONV_4_3:%.*]] = zext i16 [[TMP28]] to i32 -; CHECK-NEXT: [[ADD_4_3:%.*]] = add nuw nsw i32 [[ADD_3_3]], [[CONV_4_3]] -; CHECK-NEXT: [[MUL_4_3:%.*]] = mul nuw nsw i32 [[CONV_4_3]], [[CONV_4_3]] -; CHECK-NEXT: [[ADD11_4_3:%.*]] = add i32 [[MUL_4_3]], [[ADD11_3_3]] -; CHECK-NEXT: [[ARRAYIDX_5_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i64 5 -; CHECK-NEXT: [[TMP29:%.*]] = load i16, i16* [[ARRAYIDX_5_3]], align 2 -; CHECK-NEXT: [[CONV_5_3:%.*]] = zext i16 [[TMP29]] to i32 -; CHECK-NEXT: [[ADD_5_3:%.*]] = add nuw nsw i32 [[ADD_4_3]], [[CONV_5_3]] -; CHECK-NEXT: [[MUL_5_3:%.*]] = mul nuw nsw i32 [[CONV_5_3]], [[CONV_5_3]] -; CHECK-NEXT: [[ADD11_5_3:%.*]] = add i32 [[MUL_5_3]], [[ADD11_4_3]] -; CHECK-NEXT: [[ARRAYIDX_6_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i64 6 -; CHECK-NEXT: [[TMP30:%.*]] = load i16, i16* [[ARRAYIDX_6_3]], align 2 -; CHECK-NEXT: [[CONV_6_3:%.*]] = zext i16 [[TMP30]] to i32 -; CHECK-NEXT: [[ADD_6_3:%.*]] = add nuw nsw i32 [[ADD_5_3]], [[CONV_6_3]] -; CHECK-NEXT: [[MUL_6_3:%.*]] = mul nuw nsw i32 [[CONV_6_3]], [[CONV_6_3]] -; CHECK-NEXT: [[ADD11_6_3:%.*]] = add i32 [[MUL_6_3]], [[ADD11_5_3]] -; CHECK-NEXT: [[ARRAYIDX_7_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i64 7 -; CHECK-NEXT: [[TMP31:%.*]] = load i16, i16* [[ARRAYIDX_7_3]], align 2 -; CHECK-NEXT: [[CONV_7_3:%.*]] = zext i16 [[TMP31]] to i32 -; CHECK-NEXT: [[ADD_7_3:%.*]] = add nuw nsw i32 [[ADD_6_3]], [[CONV_7_3]] -; CHECK-NEXT: [[MUL_7_3:%.*]] = mul nuw nsw i32 [[CONV_7_3]], [[CONV_7_3]] -; CHECK-NEXT: [[ADD11_7_3:%.*]] = add i32 [[MUL_7_3]], [[ADD11_6_3]] ; CHECK-NEXT: [[ADD_PTR_3:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_2]], i64 [[IDX_EXT]] -; CHECK-NEXT: [[TMP32:%.*]] = load i16, i16* [[ADD_PTR_3]], align 2 -; CHECK-NEXT: [[CONV_452:%.*]] = zext i16 [[TMP32]] to i32 -; CHECK-NEXT: [[ADD_453:%.*]] = add nuw nsw i32 [[ADD_7_3]], [[CONV_452]] -; CHECK-NEXT: [[MUL_454:%.*]] = mul nuw nsw i32 [[CONV_452]], [[CONV_452]] -; CHECK-NEXT: [[ADD11_455:%.*]] = add i32 [[MUL_454]], [[ADD11_7_3]] -; CHECK-NEXT: [[ARRAYIDX_1_4:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_3]], i64 1 -; CHECK-NEXT: [[TMP33:%.*]] = load i16, i16* [[ARRAYIDX_1_4]], align 2 -; CHECK-NEXT: [[CONV_1_4:%.*]] = zext i16 [[TMP33]] to i32 -; CHECK-NEXT: [[ADD_1_4:%.*]] = add nuw nsw i32 [[ADD_453]], [[CONV_1_4]] -; CHECK-NEXT: [[MUL_1_4:%.*]] = mul nuw nsw i32 [[CONV_1_4]], [[CONV_1_4]] -; CHECK-NEXT: [[ADD11_1_4:%.*]] = add i32 [[MUL_1_4]], [[ADD11_455]] -; CHECK-NEXT: [[ARRAYIDX_2_4:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_3]], i64 2 -; CHECK-NEXT: [[TMP34:%.*]] = load i16, i16* [[ARRAYIDX_2_4]], align 2 -; CHECK-NEXT: [[CONV_2_4:%.*]] = zext i16 [[TMP34]] to i32 -; CHECK-NEXT: [[ADD_2_4:%.*]] = add nuw nsw i32 [[ADD_1_4]], [[CONV_2_4]] -; CHECK-NEXT: [[MUL_2_4:%.*]] = mul nuw nsw i32 [[CONV_2_4]], [[CONV_2_4]] -; CHECK-NEXT: [[ADD11_2_4:%.*]] = add i32 [[MUL_2_4]], [[ADD11_1_4]] -; CHECK-NEXT: [[ARRAYIDX_3_4:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_3]], i64 3 -; CHECK-NEXT: [[TMP35:%.*]] = load i16, i16* [[ARRAYIDX_3_4]], align 2 -; CHECK-NEXT: [[CONV_3_4:%.*]] = zext i16 [[TMP35]] to i32 -; CHECK-NEXT: [[ADD_3_4:%.*]] = add nuw nsw i32 [[ADD_2_4]], [[CONV_3_4]] -; CHECK-NEXT: [[MUL_3_4:%.*]] = mul nuw nsw i32 [[CONV_3_4]], [[CONV_3_4]] -; CHECK-NEXT: [[ADD11_3_4:%.*]] = add i32 [[MUL_3_4]], [[ADD11_2_4]] -; CHECK-NEXT: [[ARRAYIDX_4_4:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_3]], i64 4 -; CHECK-NEXT: [[TMP36:%.*]] = load i16, i16* [[ARRAYIDX_4_4]], align 2 -; CHECK-NEXT: [[CONV_4_4:%.*]] = zext i16 [[TMP36]] to i32 -; CHECK-NEXT: [[ADD_4_4:%.*]] = add nuw nsw i32 [[ADD_3_4]], [[CONV_4_4]] -; CHECK-NEXT: [[MUL_4_4:%.*]] = mul nuw nsw i32 [[CONV_4_4]], [[CONV_4_4]] -; CHECK-NEXT: [[ADD11_4_4:%.*]] = add i32 [[MUL_4_4]], [[ADD11_3_4]] -; CHECK-NEXT: [[ARRAYIDX_5_4:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_3]], i64 5 -; CHECK-NEXT: [[TMP37:%.*]] = load i16, i16* [[ARRAYIDX_5_4]], align 2 -; CHECK-NEXT: [[CONV_5_4:%.*]] = zext i16 [[TMP37]] to i32 -; CHECK-NEXT: [[ADD_5_4:%.*]] = add nuw nsw i32 [[ADD_4_4]], [[CONV_5_4]] -; CHECK-NEXT: [[MUL_5_4:%.*]] = mul nuw nsw i32 [[CONV_5_4]], [[CONV_5_4]] -; CHECK-NEXT: [[ADD11_5_4:%.*]] = add i32 [[MUL_5_4]], [[ADD11_4_4]] -; CHECK-NEXT: [[ARRAYIDX_6_4:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_3]], i64 6 -; CHECK-NEXT: [[TMP38:%.*]] = load i16, i16* [[ARRAYIDX_6_4]], align 2 -; CHECK-NEXT: [[CONV_6_4:%.*]] = zext i16 [[TMP38]] to i32 -; CHECK-NEXT: [[ADD_6_4:%.*]] = add nuw nsw i32 [[ADD_5_4]], [[CONV_6_4]] -; CHECK-NEXT: [[MUL_6_4:%.*]] = mul nuw nsw i32 [[CONV_6_4]], [[CONV_6_4]] -; CHECK-NEXT: [[ADD11_6_4:%.*]] = add i32 [[MUL_6_4]], [[ADD11_5_4]] -; CHECK-NEXT: [[ARRAYIDX_7_4:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_3]], i64 7 -; CHECK-NEXT: [[TMP39:%.*]] = load i16, i16* [[ARRAYIDX_7_4]], align 2 -; CHECK-NEXT: [[CONV_7_4:%.*]] = zext i16 [[TMP39]] to i32 -; CHECK-NEXT: [[ADD_7_4:%.*]] = add nuw nsw i32 [[ADD_6_4]], [[CONV_7_4]] -; CHECK-NEXT: [[MUL_7_4:%.*]] = mul nuw nsw i32 [[CONV_7_4]], [[CONV_7_4]] -; CHECK-NEXT: [[ADD11_7_4:%.*]] = add i32 [[MUL_7_4]], [[ADD11_6_4]] ; CHECK-NEXT: [[ADD_PTR_4:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_3]], i64 [[IDX_EXT]] -; CHECK-NEXT: [[TMP40:%.*]] = load i16, i16* [[ADD_PTR_4]], align 2 -; CHECK-NEXT: [[CONV_556:%.*]] = zext i16 [[TMP40]] to i32 -; CHECK-NEXT: [[ADD_557:%.*]] = add nuw nsw i32 [[ADD_7_4]], [[CONV_556]] -; CHECK-NEXT: [[MUL_558:%.*]] = mul nuw nsw i32 [[CONV_556]], [[CONV_556]] -; CHECK-NEXT: [[ADD11_559:%.*]] = add i32 [[MUL_558]], [[ADD11_7_4]] -; CHECK-NEXT: [[ARRAYIDX_1_5:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_4]], i64 1 -; CHECK-NEXT: [[TMP41:%.*]] = load i16, i16* [[ARRAYIDX_1_5]], align 2 -; CHECK-NEXT: [[CONV_1_5:%.*]] = zext i16 [[TMP41]] to i32 -; CHECK-NEXT: [[ADD_1_5:%.*]] = add nuw nsw i32 [[ADD_557]], [[CONV_1_5]] -; CHECK-NEXT: [[MUL_1_5:%.*]] = mul nuw nsw i32 [[CONV_1_5]], [[CONV_1_5]] -; CHECK-NEXT: [[ADD11_1_5:%.*]] = add i32 [[MUL_1_5]], [[ADD11_559]] -; CHECK-NEXT: [[ARRAYIDX_2_5:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_4]], i64 2 -; CHECK-NEXT: [[TMP42:%.*]] = load i16, i16* [[ARRAYIDX_2_5]], align 2 -; CHECK-NEXT: [[CONV_2_5:%.*]] = zext i16 [[TMP42]] to i32 -; CHECK-NEXT: [[ADD_2_5:%.*]] = add nuw nsw i32 [[ADD_1_5]], [[CONV_2_5]] -; CHECK-NEXT: [[MUL_2_5:%.*]] = mul nuw nsw i32 [[CONV_2_5]], [[CONV_2_5]] -; CHECK-NEXT: [[ADD11_2_5:%.*]] = add i32 [[MUL_2_5]], [[ADD11_1_5]] -; CHECK-NEXT: [[ARRAYIDX_3_5:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_4]], i64 3 -; CHECK-NEXT: [[TMP43:%.*]] = load i16, i16* [[ARRAYIDX_3_5]], align 2 -; CHECK-NEXT: [[CONV_3_5:%.*]] = zext i16 [[TMP43]] to i32 -; CHECK-NEXT: [[ADD_3_5:%.*]] = add nuw nsw i32 [[ADD_2_5]], [[CONV_3_5]] -; CHECK-NEXT: [[MUL_3_5:%.*]] = mul nuw nsw i32 [[CONV_3_5]], [[CONV_3_5]] -; CHECK-NEXT: [[ADD11_3_5:%.*]] = add i32 [[MUL_3_5]], [[ADD11_2_5]] -; CHECK-NEXT: [[ARRAYIDX_4_5:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_4]], i64 4 -; CHECK-NEXT: [[TMP44:%.*]] = load i16, i16* [[ARRAYIDX_4_5]], align 2 -; CHECK-NEXT: [[CONV_4_5:%.*]] = zext i16 [[TMP44]] to i32 -; CHECK-NEXT: [[ADD_4_5:%.*]] = add nuw nsw i32 [[ADD_3_5]], [[CONV_4_5]] -; CHECK-NEXT: [[MUL_4_5:%.*]] = mul nuw nsw i32 [[CONV_4_5]], [[CONV_4_5]] -; CHECK-NEXT: [[ADD11_4_5:%.*]] = add i32 [[MUL_4_5]], [[ADD11_3_5]] -; CHECK-NEXT: [[ARRAYIDX_5_5:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_4]], i64 5 -; CHECK-NEXT: [[TMP45:%.*]] = load i16, i16* [[ARRAYIDX_5_5]], align 2 -; CHECK-NEXT: [[CONV_5_5:%.*]] = zext i16 [[TMP45]] to i32 -; CHECK-NEXT: [[ADD_5_5:%.*]] = add nuw nsw i32 [[ADD_4_5]], [[CONV_5_5]] -; CHECK-NEXT: [[MUL_5_5:%.*]] = mul nuw nsw i32 [[CONV_5_5]], [[CONV_5_5]] -; CHECK-NEXT: [[ADD11_5_5:%.*]] = add i32 [[MUL_5_5]], [[ADD11_4_5]] -; CHECK-NEXT: [[ARRAYIDX_6_5:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_4]], i64 6 -; CHECK-NEXT: [[TMP46:%.*]] = load i16, i16* [[ARRAYIDX_6_5]], align 2 -; CHECK-NEXT: [[CONV_6_5:%.*]] = zext i16 [[TMP46]] to i32 -; CHECK-NEXT: [[ADD_6_5:%.*]] = add nuw nsw i32 [[ADD_5_5]], [[CONV_6_5]] -; CHECK-NEXT: [[MUL_6_5:%.*]] = mul nuw nsw i32 [[CONV_6_5]], [[CONV_6_5]] -; CHECK-NEXT: [[ADD11_6_5:%.*]] = add i32 [[MUL_6_5]], [[ADD11_5_5]] -; CHECK-NEXT: [[ARRAYIDX_7_5:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_4]], i64 7 -; CHECK-NEXT: [[TMP47:%.*]] = load i16, i16* [[ARRAYIDX_7_5]], align 2 -; CHECK-NEXT: [[CONV_7_5:%.*]] = zext i16 [[TMP47]] to i32 -; CHECK-NEXT: [[ADD_7_5:%.*]] = add nuw nsw i32 [[ADD_6_5]], [[CONV_7_5]] -; CHECK-NEXT: [[MUL_7_5:%.*]] = mul nuw nsw i32 [[CONV_7_5]], [[CONV_7_5]] -; CHECK-NEXT: [[ADD11_7_5:%.*]] = add i32 [[MUL_7_5]], [[ADD11_6_5]] ; CHECK-NEXT: [[ADD_PTR_5:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_4]], i64 [[IDX_EXT]] -; CHECK-NEXT: [[TMP48:%.*]] = load i16, i16* [[ADD_PTR_5]], align 2 -; CHECK-NEXT: [[CONV_660:%.*]] = zext i16 [[TMP48]] to i32 -; CHECK-NEXT: [[ADD_661:%.*]] = add nuw nsw i32 [[ADD_7_5]], [[CONV_660]] -; CHECK-NEXT: [[MUL_662:%.*]] = mul nuw nsw i32 [[CONV_660]], [[CONV_660]] -; CHECK-NEXT: [[ADD11_663:%.*]] = add i32 [[MUL_662]], [[ADD11_7_5]] -; CHECK-NEXT: [[ARRAYIDX_1_6:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_5]], i64 1 -; CHECK-NEXT: [[TMP49:%.*]] = load i16, i16* [[ARRAYIDX_1_6]], align 2 -; CHECK-NEXT: [[CONV_1_6:%.*]] = zext i16 [[TMP49]] to i32 -; CHECK-NEXT: [[ADD_1_6:%.*]] = add nuw nsw i32 [[ADD_661]], [[CONV_1_6]] -; CHECK-NEXT: [[MUL_1_6:%.*]] = mul nuw nsw i32 [[CONV_1_6]], [[CONV_1_6]] -; CHECK-NEXT: [[ADD11_1_6:%.*]] = add i32 [[MUL_1_6]], [[ADD11_663]] -; CHECK-NEXT: [[ARRAYIDX_2_6:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_5]], i64 2 -; CHECK-NEXT: [[TMP50:%.*]] = load i16, i16* [[ARRAYIDX_2_6]], align 2 -; CHECK-NEXT: [[CONV_2_6:%.*]] = zext i16 [[TMP50]] to i32 -; CHECK-NEXT: [[ADD_2_6:%.*]] = add nuw nsw i32 [[ADD_1_6]], [[CONV_2_6]] -; CHECK-NEXT: [[MUL_2_6:%.*]] = mul nuw nsw i32 [[CONV_2_6]], [[CONV_2_6]] -; CHECK-NEXT: [[ADD11_2_6:%.*]] = add i32 [[MUL_2_6]], [[ADD11_1_6]] -; CHECK-NEXT: [[ARRAYIDX_3_6:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_5]], i64 3 -; CHECK-NEXT: [[TMP51:%.*]] = load i16, i16* [[ARRAYIDX_3_6]], align 2 -; CHECK-NEXT: [[CONV_3_6:%.*]] = zext i16 [[TMP51]] to i32 -; CHECK-NEXT: [[ADD_3_6:%.*]] = add nuw nsw i32 [[ADD_2_6]], [[CONV_3_6]] -; CHECK-NEXT: [[MUL_3_6:%.*]] = mul nuw nsw i32 [[CONV_3_6]], [[CONV_3_6]] -; CHECK-NEXT: [[ADD11_3_6:%.*]] = add i32 [[MUL_3_6]], [[ADD11_2_6]] -; CHECK-NEXT: [[ARRAYIDX_4_6:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_5]], i64 4 -; CHECK-NEXT: [[TMP52:%.*]] = load i16, i16* [[ARRAYIDX_4_6]], align 2 -; CHECK-NEXT: [[CONV_4_6:%.*]] = zext i16 [[TMP52]] to i32 -; CHECK-NEXT: [[ADD_4_6:%.*]] = add nuw nsw i32 [[ADD_3_6]], [[CONV_4_6]] -; CHECK-NEXT: [[MUL_4_6:%.*]] = mul nuw nsw i32 [[CONV_4_6]], [[CONV_4_6]] -; CHECK-NEXT: [[ADD11_4_6:%.*]] = add i32 [[MUL_4_6]], [[ADD11_3_6]] -; CHECK-NEXT: [[ARRAYIDX_5_6:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_5]], i64 5 -; CHECK-NEXT: [[TMP53:%.*]] = load i16, i16* [[ARRAYIDX_5_6]], align 2 -; CHECK-NEXT: [[CONV_5_6:%.*]] = zext i16 [[TMP53]] to i32 -; CHECK-NEXT: [[ADD_5_6:%.*]] = add nuw nsw i32 [[ADD_4_6]], [[CONV_5_6]] -; CHECK-NEXT: [[MUL_5_6:%.*]] = mul nuw nsw i32 [[CONV_5_6]], [[CONV_5_6]] -; CHECK-NEXT: [[ADD11_5_6:%.*]] = add i32 [[MUL_5_6]], [[ADD11_4_6]] -; CHECK-NEXT: [[ARRAYIDX_6_6:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_5]], i64 6 -; CHECK-NEXT: [[TMP54:%.*]] = load i16, i16* [[ARRAYIDX_6_6]], align 2 -; CHECK-NEXT: [[CONV_6_6:%.*]] = zext i16 [[TMP54]] to i32 -; CHECK-NEXT: [[ADD_6_6:%.*]] = add nuw nsw i32 [[ADD_5_6]], [[CONV_6_6]] -; CHECK-NEXT: [[MUL_6_6:%.*]] = mul nuw nsw i32 [[CONV_6_6]], [[CONV_6_6]] -; CHECK-NEXT: [[ADD11_6_6:%.*]] = add i32 [[MUL_6_6]], [[ADD11_5_6]] -; CHECK-NEXT: [[ARRAYIDX_7_6:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_5]], i64 7 -; CHECK-NEXT: [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX_7_6]], align 2 -; CHECK-NEXT: [[CONV_7_6:%.*]] = zext i16 [[TMP55]] to i32 -; CHECK-NEXT: [[ADD_7_6:%.*]] = add nuw nsw i32 [[ADD_6_6]], [[CONV_7_6]] -; CHECK-NEXT: [[MUL_7_6:%.*]] = mul nuw nsw i32 [[CONV_7_6]], [[CONV_7_6]] -; CHECK-NEXT: [[ADD11_7_6:%.*]] = add i32 [[MUL_7_6]], [[ADD11_6_6]] ; CHECK-NEXT: [[ADD_PTR_6:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_5]], i64 [[IDX_EXT]] -; CHECK-NEXT: [[TMP56:%.*]] = load i16, i16* [[ADD_PTR_6]], align 2 -; CHECK-NEXT: [[CONV_764:%.*]] = zext i16 [[TMP56]] to i32 -; CHECK-NEXT: [[ADD_765:%.*]] = add nuw nsw i32 [[ADD_7_6]], [[CONV_764]] -; CHECK-NEXT: [[MUL_766:%.*]] = mul nuw nsw i32 [[CONV_764]], [[CONV_764]] -; CHECK-NEXT: [[ADD11_767:%.*]] = add i32 [[MUL_766]], [[ADD11_7_6]] -; CHECK-NEXT: [[ARRAYIDX_1_7:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_6]], i64 1 -; CHECK-NEXT: [[TMP57:%.*]] = load i16, i16* [[ARRAYIDX_1_7]], align 2 -; CHECK-NEXT: [[CONV_1_7:%.*]] = zext i16 [[TMP57]] to i32 -; CHECK-NEXT: [[ADD_1_7:%.*]] = add nuw nsw i32 [[ADD_765]], [[CONV_1_7]] -; CHECK-NEXT: [[MUL_1_7:%.*]] = mul nuw nsw i32 [[CONV_1_7]], [[CONV_1_7]] -; CHECK-NEXT: [[ADD11_1_7:%.*]] = add i32 [[MUL_1_7]], [[ADD11_767]] -; CHECK-NEXT: [[ARRAYIDX_2_7:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_6]], i64 2 -; CHECK-NEXT: [[TMP58:%.*]] = load i16, i16* [[ARRAYIDX_2_7]], align 2 -; CHECK-NEXT: [[CONV_2_7:%.*]] = zext i16 [[TMP58]] to i32 -; CHECK-NEXT: [[ADD_2_7:%.*]] = add nuw nsw i32 [[ADD_1_7]], [[CONV_2_7]] -; CHECK-NEXT: [[MUL_2_7:%.*]] = mul nuw nsw i32 [[CONV_2_7]], [[CONV_2_7]] -; CHECK-NEXT: [[ADD11_2_7:%.*]] = add i32 [[MUL_2_7]], [[ADD11_1_7]] -; CHECK-NEXT: [[ARRAYIDX_3_7:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_6]], i64 3 -; CHECK-NEXT: [[TMP59:%.*]] = load i16, i16* [[ARRAYIDX_3_7]], align 2 -; CHECK-NEXT: [[CONV_3_7:%.*]] = zext i16 [[TMP59]] to i32 -; CHECK-NEXT: [[ADD_3_7:%.*]] = add nuw nsw i32 [[ADD_2_7]], [[CONV_3_7]] -; CHECK-NEXT: [[MUL_3_7:%.*]] = mul nuw nsw i32 [[CONV_3_7]], [[CONV_3_7]] -; CHECK-NEXT: [[ADD11_3_7:%.*]] = add i32 [[MUL_3_7]], [[ADD11_2_7]] -; CHECK-NEXT: [[ARRAYIDX_4_7:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_6]], i64 4 -; CHECK-NEXT: [[TMP60:%.*]] = load i16, i16* [[ARRAYIDX_4_7]], align 2 -; CHECK-NEXT: [[CONV_4_7:%.*]] = zext i16 [[TMP60]] to i32 -; CHECK-NEXT: [[ADD_4_7:%.*]] = add nuw nsw i32 [[ADD_3_7]], [[CONV_4_7]] -; CHECK-NEXT: [[MUL_4_7:%.*]] = mul nuw nsw i32 [[CONV_4_7]], [[CONV_4_7]] -; CHECK-NEXT: [[ADD11_4_7:%.*]] = add i32 [[MUL_4_7]], [[ADD11_3_7]] -; CHECK-NEXT: [[ARRAYIDX_5_7:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_6]], i64 5 -; CHECK-NEXT: [[TMP61:%.*]] = load i16, i16* [[ARRAYIDX_5_7]], align 2 -; CHECK-NEXT: [[CONV_5_7:%.*]] = zext i16 [[TMP61]] to i32 -; CHECK-NEXT: [[ADD_5_7:%.*]] = add nuw nsw i32 [[ADD_4_7]], [[CONV_5_7]] -; CHECK-NEXT: [[MUL_5_7:%.*]] = mul nuw nsw i32 [[CONV_5_7]], [[CONV_5_7]] -; CHECK-NEXT: [[ADD11_5_7:%.*]] = add i32 [[MUL_5_7]], [[ADD11_4_7]] -; CHECK-NEXT: [[ARRAYIDX_6_7:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_6]], i64 6 -; CHECK-NEXT: [[TMP62:%.*]] = load i16, i16* [[ARRAYIDX_6_7]], align 2 -; CHECK-NEXT: [[CONV_6_7:%.*]] = zext i16 [[TMP62]] to i32 -; CHECK-NEXT: [[ADD_6_7:%.*]] = add nuw nsw i32 [[ADD_5_7]], [[CONV_6_7]] -; CHECK-NEXT: [[MUL_6_7:%.*]] = mul nuw nsw i32 [[CONV_6_7]], [[CONV_6_7]] -; CHECK-NEXT: [[ADD11_6_7:%.*]] = add i32 [[MUL_6_7]], [[ADD11_5_7]] -; CHECK-NEXT: [[ARRAYIDX_7_7:%.*]] = getelementptr inbounds i16, i16* [[ADD_PTR_6]], i64 7 -; CHECK-NEXT: [[TMP63:%.*]] = load i16, i16* [[ARRAYIDX_7_7]], align 2 -; CHECK-NEXT: [[CONV_7_7:%.*]] = zext i16 [[TMP63]] to i32 -; CHECK-NEXT: [[ADD_7_7:%.*]] = add nuw nsw i32 [[ADD_6_7]], [[CONV_7_7]] -; CHECK-NEXT: [[MUL_7_7:%.*]] = mul nuw nsw i32 [[CONV_7_7]], [[CONV_7_7]] -; CHECK-NEXT: [[ADD11_7_7:%.*]] = add i32 [[MUL_7_7]], [[ADD11_6_7]] -; CHECK-NEXT: [[CONV15:%.*]] = zext i32 [[ADD_7_7]] to i64 -; CHECK-NEXT: [[CONV16:%.*]] = zext i32 [[ADD11_7_7]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[P]] to <8 x i16>* +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[ADD_PTR]] to <8 x i16>* +; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, <8 x i16>* [[TMP2]], align 2 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[ADD_PTR_1]] to <8 x i16>* +; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[TMP4]], align 2 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16* [[ADD_PTR_2]] to <8 x i16>* +; CHECK-NEXT: [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[TMP6]], align 2 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[ADD_PTR_3]] to <8 x i16>* +; CHECK-NEXT: [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[TMP8]], align 2 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16* [[ADD_PTR_4]] to <8 x i16>* +; CHECK-NEXT: [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[TMP10]], align 2 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16* [[ADD_PTR_5]] to <8 x i16>* +; CHECK-NEXT: [[TMP13:%.*]] = load <8 x i16>, <8 x i16>* [[TMP12]], align 2 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i16* [[ADD_PTR_6]] to <8 x i16>* +; CHECK-NEXT: [[TMP15:%.*]] = load <8 x i16>, <8 x i16>* [[TMP14]], align 2 +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <64 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> poison, <64 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <64 x i16> [[TMP16]], <64 x i16> [[TMP17]], <64 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x i16> [[TMP5]], <8 x i16> poison, <64 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <64 x i16> [[TMP18]], <64 x i16> [[TMP19]], <64 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <8 x i16> [[TMP7]], <8 x i16> poison, <64 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <64 x i16> [[TMP20]], <64 x i16> [[TMP21]], <64 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <8 x i16> [[TMP9]], <8 x i16> poison, <64 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <64 x i16> [[TMP22]], <64 x i16> [[TMP23]], <64 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <8 x i16> [[TMP11]], <8 x i16> poison, <64 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <64 x i16> [[TMP24]], <64 x i16> [[TMP25]], <64 x i32> +; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <8 x i16> [[TMP13]], <8 x i16> poison, <64 x i32> +; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <64 x i16> [[TMP26]], <64 x i16> [[TMP27]], <64 x i32> +; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <8 x i16> [[TMP15]], <8 x i16> poison, <64 x i32> +; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <64 x i16> [[TMP28]], <64 x i16> [[TMP29]], <64 x i32> +; CHECK-NEXT: [[TMP31:%.*]] = zext <64 x i16> [[TMP30]] to <64 x i32> +; CHECK-NEXT: [[TMP32:%.*]] = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> [[TMP31]]) +; CHECK-NEXT: [[TMP33:%.*]] = mul nuw nsw <64 x i32> [[TMP31]], [[TMP31]] +; CHECK-NEXT: [[TMP34:%.*]] = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> [[TMP33]]) +; CHECK-NEXT: [[CONV15:%.*]] = zext i32 [[TMP32]] to i64 +; CHECK-NEXT: [[CONV16:%.*]] = zext i32 [[TMP34]] to i64 ; CHECK-NEXT: [[SHL:%.*]] = shl nuw i64 [[CONV16]], 32 ; CHECK-NEXT: [[ADD17:%.*]] = or i64 [[SHL]], [[CONV15]] ; CHECK-NEXT: ret i64 [[ADD17]]