Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -680,6 +680,11 @@ /// size of the widest element type. bool shouldMaximizeVectorBandwidth(bool OptSize) const; + /// \return The minimum vectorization factor for types of given element + /// bit width, or 0 if there is no mimimum VF. The returned value only + /// applies when shouldMaximizeVectorBandwidth returns true. + unsigned getMinimumVF(unsigned ElemWidth) const; + /// \return True if it should be considered for address type promotion. /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is /// profitable without finding other extensions fed by the same input. @@ -1074,6 +1079,7 @@ virtual unsigned getRegisterBitWidth(bool Vector) const = 0; virtual unsigned getMinVectorRegisterBitWidth() = 0; virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0; + virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0; virtual bool shouldConsiderAddressTypePromotion( const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0; virtual unsigned getCacheLineSize() = 0; @@ -1373,6 +1379,9 @@ bool shouldMaximizeVectorBandwidth(bool OptSize) const override { return Impl.shouldMaximizeVectorBandwidth(OptSize); } + unsigned getMinimumVF(unsigned ElemWidth) const override { + return Impl.getMinimumVF(ElemWidth); + } bool shouldConsiderAddressTypePromotion( const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override { return Impl.shouldConsiderAddressTypePromotion( Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -353,6 +353,8 @@ bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; } + unsigned getMinimumVF(unsigned ElemWidth) const { return 0; } + bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) { Index: lib/Analysis/TargetTransformInfo.cpp =================================================================== --- lib/Analysis/TargetTransformInfo.cpp +++ lib/Analysis/TargetTransformInfo.cpp @@ -344,6 +344,10 @@ return TTIImpl->shouldMaximizeVectorBandwidth(OptSize); } +unsigned TargetTransformInfo::getMinimumVF(unsigned ElemWidth) const { + return TTIImpl->getMinimumVF(ElemWidth); +} + bool TargetTransformInfo::shouldConsiderAddressTypePromotion( const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const { return TTIImpl->shouldConsiderAddressTypePromotion( Index: lib/Target/Hexagon/HexagonTargetTransformInfo.h =================================================================== --- lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -74,6 +74,7 @@ unsigned getRegisterBitWidth(bool Vector) const; unsigned getMinVectorRegisterBitWidth() const; bool shouldMaximizeVectorBandwidth(bool OptSize) const { return true; } + unsigned getMinimumVF(unsigned ElemWidth) const; bool supportsEfficientVectorElementLoadStore() { return false; Index: lib/Target/Hexagon/HexagonTargetTransformInfo.cpp =================================================================== --- lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -76,6 +76,10 @@ return getST()->useHVXOps() ? getST()->getVectorLength()*8 : 0; } +unsigned HexagonTTIImpl::getMinimumVF(unsigned ElemWidth) const { + return (8 * getST()->getVectorLength()) / ElemWidth; +} + unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I) { if (Opcode == Instruction::Load && Src->isVectorTy()) { Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6160,6 +6160,13 @@ break; } } + if (unsigned MinVF = TTI.getMinimumVF(SmallestType)) { + if (MaxVF < MinVF) { + DEBUG(dbgs() << "LV: Overriding calculated MaxVF(" << MaxVF + << ") with target's minimum: " << MinVF << '\n'); + MaxVF = MinVF; + } + } } return MaxVF; } Index: test/Transforms/LoopVectorize/Hexagon/lit.local.cfg =================================================================== --- /dev/null +++ test/Transforms/LoopVectorize/Hexagon/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'Hexagon' in config.root.targets: + config.unsupported = True Index: test/Transforms/LoopVectorize/Hexagon/minimum-vf.ll =================================================================== --- /dev/null +++ test/Transforms/LoopVectorize/Hexagon/minimum-vf.ll @@ -0,0 +1,439 @@ +; RUN: opt -march=hexagon -loop-vectorize -hexagon-autohvx -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s +; REQUIRES: asserts + +; Check that TTI::getMinimumVF works. +; CHECK: LV: Overriding calculated MaxVF(9) with target's minimum: 64 + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +%s.0 = type { i8*, i32, i32, i32, i32 } + +@g0 = external dso_local local_unnamed_addr global %s.0**, align 4 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #0 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +define hidden fastcc void @f0(i8* nocapture %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i8 zeroext %a5) unnamed_addr #1 { +b0: + %v0 = alloca [4 x [9 x i16]], align 8 + %v1 = bitcast [4 x [9 x i16]]* %v0 to i8* + call void @llvm.lifetime.start.p0i8(i64 72, i8* nonnull %v1) #2 + %v2 = add i32 %a1, -2 + %v3 = add i32 %a3, -9 + %v4 = icmp ugt i32 %v2, %v3 + %v5 = add i32 %a2, -2 + %v6 = add i32 %a4, -9 + %v7 = icmp ugt i32 %v5, %v6 + %v8 = or i1 %v4, %v7 + %v9 = load %s.0**, %s.0*** @g0, align 4, !tbaa !1 + %v10 = zext i8 %a5 to i32 + %v11 = getelementptr inbounds %s.0*, %s.0** %v9, i32 %v10 + %v12 = load %s.0*, %s.0** %v11, align 4, !tbaa !1 + %v13 = getelementptr inbounds %s.0, %s.0* %v12, i32 0, i32 0 + %v14 = load i8*, i8** %v13, align 4, !tbaa !5 + br i1 %v8, label %b3, label %b1 + +b1: ; preds = %b0 + %v15 = mul i32 %v5, %a3 + %v16 = getelementptr inbounds i8, i8* %v14, i32 %v15 + %v17 = getelementptr inbounds i8, i8* %v16, i32 %v2 + br label %b2 + +b2: ; preds = %b2, %b1 + %v18 = phi i32 [ 0, %b1 ], [ %v83, %b2 ] + %v19 = phi i8* [ %v17, %b1 ], [ %v82, %b2 ] + %v20 = load i8, i8* %v19, align 1, !tbaa !8 + %v21 = zext i8 %v20 to i32 + %v22 = getelementptr inbounds i8, i8* %v19, i32 1 + %v23 = load i8, i8* %v22, align 1, !tbaa !8 + %v24 = zext i8 %v23 to i32 + %v25 = getelementptr inbounds i8, i8* %v19, i32 2 + %v26 = load i8, i8* %v25, align 1, !tbaa !8 + %v27 = zext i8 %v26 to i32 + %v28 = getelementptr inbounds i8, i8* %v19, i32 3 + %v29 = load i8, i8* %v28, align 1, !tbaa !8 + %v30 = zext i8 %v29 to i32 + %v31 = getelementptr inbounds i8, i8* %v19, i32 4 + %v32 = load i8, i8* %v31, align 1, !tbaa !8 + %v33 = zext i8 %v32 to i32 + %v34 = getelementptr inbounds i8, i8* %v19, i32 5 + %v35 = load i8, i8* %v34, align 1, !tbaa !8 + %v36 = zext i8 %v35 to i32 + %v37 = getelementptr inbounds i8, i8* %v19, i32 6 + %v38 = load i8, i8* %v37, align 1, !tbaa !8 + %v39 = zext i8 %v38 to i32 + %v40 = getelementptr inbounds i8, i8* %v19, i32 7 + %v41 = load i8, i8* %v40, align 1, !tbaa !8 + %v42 = zext i8 %v41 to i32 + %v43 = getelementptr inbounds i8, i8* %v19, i32 8 + %v44 = load i8, i8* %v43, align 1, !tbaa !8 + %v45 = zext i8 %v44 to i32 + %v46 = add nuw nsw i32 %v33, %v24 + %v47 = mul nsw i32 %v46, -5 + %v48 = add nuw nsw i32 %v30, %v27 + %v49 = mul nuw nsw i32 %v48, 20 + %v50 = add nuw nsw i32 %v49, %v21 + %v51 = add nuw nsw i32 %v50, %v36 + %v52 = add nsw i32 %v51, %v47 + %v53 = trunc i32 %v52 to i16 + %v54 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 0, i32 %v18 + store i16 %v53, i16* %v54, align 2, !tbaa !9 + %v55 = add nuw nsw i32 %v36, %v27 + %v56 = mul nsw i32 %v55, -5 + %v57 = add nuw nsw i32 %v33, %v30 + %v58 = mul nuw nsw i32 %v57, 20 + %v59 = add nuw nsw i32 %v58, %v24 + %v60 = add nuw nsw i32 %v59, %v39 + %v61 = add nsw i32 %v60, %v56 + %v62 = trunc i32 %v61 to i16 + %v63 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 1, i32 %v18 + store i16 %v62, i16* %v63, align 2, !tbaa !9 + %v64 = add nuw nsw i32 %v39, %v30 + %v65 = mul nsw i32 %v64, -5 + %v66 = add nuw nsw i32 %v36, %v33 + %v67 = mul nuw nsw i32 %v66, 20 + %v68 = add nuw nsw i32 %v67, %v27 + %v69 = add nuw nsw i32 %v68, %v42 + %v70 = add nsw i32 %v69, %v65 + %v71 = trunc i32 %v70 to i16 + %v72 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 2, i32 %v18 + store i16 %v71, i16* %v72, align 2, !tbaa !9 + %v73 = add nuw nsw i32 %v42, %v33 + %v74 = mul nsw i32 %v73, -5 + %v75 = add nuw nsw i32 %v39, %v36 + %v76 = mul nuw nsw i32 %v75, 20 + %v77 = add nuw nsw i32 %v76, %v30 + %v78 = add nuw nsw i32 %v77, %v45 + %v79 = add nsw i32 %v78, %v74 + %v80 = trunc i32 %v79 to i16 + %v81 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 3, i32 %v18 + store i16 %v80, i16* %v81, align 2, !tbaa !9 + %v82 = getelementptr inbounds i8, i8* %v19, i32 %a3 + %v83 = add nuw nsw i32 %v18, 1 + %v84 = icmp eq i32 %v83, 9 + br i1 %v84, label %b6, label %b2 + +b3: ; preds = %b0 + %v85 = add nsw i32 %a3, -1 + %v86 = add nsw i32 %a4, -1 + %v87 = add i32 %a1, 1 + %v88 = add i32 %a1, 2 + %v89 = icmp slt i32 %v87, 0 + %v90 = icmp slt i32 %v87, %a3 + %v91 = select i1 %v90, i32 %v87, i32 %v85 + %v92 = select i1 %v89, i32 0, i32 %v91 + %v93 = add i32 %a1, -1 + %v94 = icmp slt i32 %v2, 0 + %v95 = icmp slt i32 %v2, %a3 + %v96 = select i1 %v95, i32 %v2, i32 %v85 + %v97 = select i1 %v94, i32 0, i32 %v96 + %v98 = icmp slt i32 %v93, 0 + %v99 = icmp slt i32 %v93, %a3 + %v100 = select i1 %v99, i32 %v93, i32 %v85 + %v101 = select i1 %v98, i32 0, i32 %v100 + %v102 = add nsw i32 %a1, 1 + %v103 = icmp slt i32 %a1, 0 + %v104 = icmp slt i32 %a1, %a3 + %v105 = select i1 %v104, i32 %a1, i32 %v85 + %v106 = select i1 %v103, i32 0, i32 %v105 + %v107 = add nsw i32 %a1, 2 + %v108 = icmp slt i32 %a1, -1 + %v109 = icmp slt i32 %v102, %a3 + %v110 = select i1 %v109, i32 %v102, i32 %v85 + %v111 = select i1 %v108, i32 0, i32 %v110 + %v112 = add nsw i32 %a1, 3 + %v113 = icmp slt i32 %a1, -2 + %v114 = icmp slt i32 %v107, %a3 + %v115 = select i1 %v114, i32 %v107, i32 %v85 + %v116 = select i1 %v113, i32 0, i32 %v115 + %v117 = icmp slt i32 %a1, -3 + %v118 = icmp slt i32 %v112, %a3 + %v119 = select i1 %v118, i32 %v112, i32 %v85 + %v120 = select i1 %v117, i32 0, i32 %v119 + %v121 = add nsw i32 %a1, 4 + %v122 = icmp slt i32 %a1, -4 + %v123 = icmp slt i32 %v121, %a3 + %v124 = select i1 %v123, i32 %v121, i32 %v85 + %v125 = select i1 %v122, i32 0, i32 %v124 + %v126 = add nsw i32 %a1, 5 + %v127 = icmp slt i32 %a1, -5 + %v128 = icmp slt i32 %v126, %a3 + %v129 = select i1 %v128, i32 %v126, i32 %v85 + %v130 = select i1 %v127, i32 0, i32 %v129 + %v131 = add i32 %a1, 3 + %v132 = icmp slt i32 %v88, 0 + %v133 = icmp slt i32 %v88, %a3 + %v134 = select i1 %v133, i32 %v88, i32 %v85 + %v135 = select i1 %v132, i32 0, i32 %v134 + %v136 = add i32 %a1, 4 + %v137 = icmp slt i32 %v131, 0 + %v138 = icmp slt i32 %v131, %a3 + %v139 = select i1 %v138, i32 %v131, i32 %v85 + %v140 = select i1 %v137, i32 0, i32 %v139 + %v141 = add i32 %a1, 5 + %v142 = icmp slt i32 %v136, 0 + %v143 = icmp slt i32 %v136, %a3 + %v144 = select i1 %v143, i32 %v136, i32 %v85 + %v145 = select i1 %v142, i32 0, i32 %v144 + %v146 = add i32 %a1, 6 + %v147 = icmp slt i32 %v141, 0 + %v148 = icmp slt i32 %v141, %a3 + %v149 = select i1 %v148, i32 %v141, i32 %v85 + %v150 = select i1 %v147, i32 0, i32 %v149 + %v151 = icmp slt i32 %v146, 0 + %v152 = icmp slt i32 %v146, %a3 + %v153 = select i1 %v152, i32 %v146, i32 %v85 + %v154 = select i1 %v151, i32 0, i32 %v153 + br label %b4 + +b4: ; preds = %b4, %b3 + %v155 = phi i32 [ 0, %b3 ], [ %v259, %b4 ] + %v156 = add i32 %v5, %v155 + %v157 = icmp slt i32 %v156, 0 + %v158 = icmp slt i32 %v156, %a4 + %v159 = select i1 %v158, i32 %v156, i32 %v86 + %v160 = select i1 %v157, i32 0, i32 %v159 + %v161 = mul i32 %v160, %a3 + %v162 = add i32 %v97, %v161 + %v163 = getelementptr inbounds i8, i8* %v14, i32 %v162 + %v164 = load i8, i8* %v163, align 1, !tbaa !8 + %v165 = zext i8 %v164 to i32 + %v166 = add i32 %v101, %v161 + %v167 = getelementptr inbounds i8, i8* %v14, i32 %v166 + %v168 = load i8, i8* %v167, align 1, !tbaa !8 + %v169 = zext i8 %v168 to i32 + %v170 = mul nsw i32 %v169, -5 + %v171 = add nsw i32 %v170, %v165 + %v172 = add i32 %v106, %v161 + %v173 = getelementptr inbounds i8, i8* %v14, i32 %v172 + %v174 = load i8, i8* %v173, align 1, !tbaa !8 + %v175 = zext i8 %v174 to i32 + %v176 = mul nuw nsw i32 %v175, 20 + %v177 = add nsw i32 %v176, %v171 + %v178 = add i32 %v111, %v161 + %v179 = getelementptr inbounds i8, i8* %v14, i32 %v178 + %v180 = load i8, i8* %v179, align 1, !tbaa !8 + %v181 = zext i8 %v180 to i32 + %v182 = mul nuw nsw i32 %v181, 20 + %v183 = add nsw i32 %v182, %v177 + %v184 = add i32 %v116, %v161 + %v185 = getelementptr inbounds i8, i8* %v14, i32 %v184 + %v186 = load i8, i8* %v185, align 1, !tbaa !8 + %v187 = zext i8 %v186 to i32 + %v188 = mul nsw i32 %v187, -5 + %v189 = add nsw i32 %v188, %v183 + %v190 = add i32 %v120, %v161 + %v191 = getelementptr inbounds i8, i8* %v14, i32 %v190 + %v192 = load i8, i8* %v191, align 1, !tbaa !8 + %v193 = zext i8 %v192 to i32 + %v194 = add nsw i32 %v189, %v193 + %v195 = trunc i32 %v194 to i16 + %v196 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 0, i32 %v155 + store i16 %v195, i16* %v196, align 2, !tbaa !9 + %v197 = mul nsw i32 %v175, -5 + %v198 = add nsw i32 %v197, %v169 + %v199 = add nsw i32 %v182, %v198 + %v200 = mul nuw nsw i32 %v187, 20 + %v201 = add nsw i32 %v200, %v199 + %v202 = mul nsw i32 %v193, -5 + %v203 = add nsw i32 %v202, %v201 + %v204 = add i32 %v125, %v161 + %v205 = getelementptr inbounds i8, i8* %v14, i32 %v204 + %v206 = load i8, i8* %v205, align 1, !tbaa !8 + %v207 = zext i8 %v206 to i32 + %v208 = add nsw i32 %v203, %v207 + %v209 = trunc i32 %v208 to i16 + %v210 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 1, i32 %v155 + store i16 %v209, i16* %v210, align 2, !tbaa !9 + %v211 = mul nsw i32 %v181, -5 + %v212 = add nsw i32 %v211, %v175 + %v213 = add nsw i32 %v200, %v212 + %v214 = mul nuw nsw i32 %v193, 20 + %v215 = add nsw i32 %v214, %v213 + %v216 = mul nsw i32 %v207, -5 + %v217 = add nsw i32 %v216, %v215 + %v218 = add i32 %v130, %v161 + %v219 = getelementptr inbounds i8, i8* %v14, i32 %v218 + %v220 = load i8, i8* %v219, align 1, !tbaa !8 + %v221 = zext i8 %v220 to i32 + %v222 = add nsw i32 %v217, %v221 + %v223 = trunc i32 %v222 to i16 + %v224 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 2, i32 %v155 + store i16 %v223, i16* %v224, align 2, !tbaa !9 + %v225 = add i32 %v92, %v161 + %v226 = getelementptr inbounds i8, i8* %v14, i32 %v225 + %v227 = load i8, i8* %v226, align 1, !tbaa !8 + %v228 = zext i8 %v227 to i16 + %v229 = add i32 %v135, %v161 + %v230 = getelementptr inbounds i8, i8* %v14, i32 %v229 + %v231 = load i8, i8* %v230, align 1, !tbaa !8 + %v232 = zext i8 %v231 to i16 + %v233 = mul nsw i16 %v232, -5 + %v234 = add nsw i16 %v233, %v228 + %v235 = add i32 %v140, %v161 + %v236 = getelementptr inbounds i8, i8* %v14, i32 %v235 + %v237 = load i8, i8* %v236, align 1, !tbaa !8 + %v238 = zext i8 %v237 to i16 + %v239 = mul nuw nsw i16 %v238, 20 + %v240 = add nsw i16 %v239, %v234 + %v241 = add i32 %v145, %v161 + %v242 = getelementptr inbounds i8, i8* %v14, i32 %v241 + %v243 = load i8, i8* %v242, align 1, !tbaa !8 + %v244 = zext i8 %v243 to i16 + %v245 = mul nuw nsw i16 %v244, 20 + %v246 = add i16 %v245, %v240 + %v247 = add i32 %v150, %v161 + %v248 = getelementptr inbounds i8, i8* %v14, i32 %v247 + %v249 = load i8, i8* %v248, align 1, !tbaa !8 + %v250 = zext i8 %v249 to i16 + %v251 = mul nsw i16 %v250, -5 + %v252 = add i16 %v251, %v246 + %v253 = add i32 %v154, %v161 + %v254 = getelementptr inbounds i8, i8* %v14, i32 %v253 + %v255 = load i8, i8* %v254, align 1, !tbaa !8 + %v256 = zext i8 %v255 to i16 + %v257 = add i16 %v252, %v256 + %v258 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 3, i32 %v155 + store i16 %v257, i16* %v258, align 2, !tbaa !9 + %v259 = add nuw nsw i32 %v155, 1 + %v260 = icmp eq i32 %v259, 9 + br i1 %v260, label %b5, label %b4 + +b5: ; preds = %b4 + br label %b7 + +b6: ; preds = %b2 + br label %b7 + +b7: ; preds = %b6, %b5 + br label %b8 + +b8: ; preds = %b8, %b7 + %v261 = phi i8* [ %a0, %b7 ], [ %v353, %b8 ] + %v262 = phi i32 [ 0, %b7 ], [ %v354, %b8 ] + %v263 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 %v262, i32 0 + %v264 = load i16, i16* %v263, align 2, !tbaa !9 + %v265 = sext i16 %v264 to i32 + %v266 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 %v262, i32 1 + %v267 = load i16, i16* %v266, align 2, !tbaa !9 + %v268 = sext i16 %v267 to i32 + %v269 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 %v262, i32 2 + %v270 = load i16, i16* %v269, align 2, !tbaa !9 + %v271 = sext i16 %v270 to i32 + %v272 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 %v262, i32 3 + %v273 = load i16, i16* %v272, align 2, !tbaa !9 + %v274 = sext i16 %v273 to i32 + %v275 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 %v262, i32 4 + %v276 = load i16, i16* %v275, align 2, !tbaa !9 + %v277 = sext i16 %v276 to i32 + %v278 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 %v262, i32 5 + %v279 = load i16, i16* %v278, align 2, !tbaa !9 + %v280 = sext i16 %v279 to i32 + %v281 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 %v262, i32 6 + %v282 = load i16, i16* %v281, align 2, !tbaa !9 + %v283 = sext i16 %v282 to i32 + %v284 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 %v262, i32 7 + %v285 = load i16, i16* %v284, align 2, !tbaa !9 + %v286 = sext i16 %v285 to i32 + %v287 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 %v262, i32 8 + %v288 = load i16, i16* %v287, align 2, !tbaa !9 + %v289 = sext i16 %v288 to i32 + %v290 = add nsw i32 %v277, %v268 + %v291 = mul nsw i32 %v290, -5 + %v292 = add nsw i32 %v274, %v271 + %v293 = mul nsw i32 %v292, 20 + %v294 = add nsw i32 %v293, %v265 + %v295 = add nsw i32 %v294, %v280 + %v296 = add nsw i32 %v295, %v291 + %v297 = add nsw i32 %v296, 512 + %v298 = lshr i32 %v297, 10 + %v299 = and i32 %v297, 262144 + %v300 = icmp eq i32 %v299, 0 + %v301 = icmp slt i32 %v296, -512 + %v302 = select i1 %v301, i32 0, i32 255 + %v303 = select i1 %v300, i32 %v298, i32 %v302 + %v304 = trunc i32 %v303 to i8 + store i8 %v304, i8* %v261, align 1, !tbaa !8 + %v305 = add nsw i32 %v280, %v271 + %v306 = mul nsw i32 %v305, -5 + %v307 = add nsw i32 %v277, %v274 + %v308 = mul nsw i32 %v307, 20 + %v309 = add nsw i32 %v308, %v268 + %v310 = add nsw i32 %v309, %v283 + %v311 = add nsw i32 %v310, %v306 + %v312 = add nsw i32 %v311, 512 + %v313 = lshr i32 %v312, 10 + %v314 = and i32 %v312, 262144 + %v315 = icmp eq i32 %v314, 0 + %v316 = icmp slt i32 %v311, -512 + %v317 = select i1 %v316, i32 0, i32 255 + %v318 = select i1 %v315, i32 %v313, i32 %v317 + %v319 = trunc i32 %v318 to i8 + %v320 = getelementptr inbounds i8, i8* %v261, i32 4 + store i8 %v319, i8* %v320, align 1, !tbaa !8 + %v321 = add nsw i32 %v283, %v274 + %v322 = mul nsw i32 %v321, -5 + %v323 = add nsw i32 %v280, %v277 + %v324 = mul nsw i32 %v323, 20 + %v325 = add nsw i32 %v324, %v271 + %v326 = add nsw i32 %v325, %v286 + %v327 = add nsw i32 %v326, %v322 + %v328 = add nsw i32 %v327, 512 + %v329 = lshr i32 %v328, 10 + %v330 = and i32 %v328, 262144 + %v331 = icmp eq i32 %v330, 0 + %v332 = icmp slt i32 %v327, -512 + %v333 = select i1 %v332, i32 0, i32 255 + %v334 = select i1 %v331, i32 %v329, i32 %v333 + %v335 = trunc i32 %v334 to i8 + %v336 = getelementptr inbounds i8, i8* %v261, i32 8 + store i8 %v335, i8* %v336, align 1, !tbaa !8 + %v337 = add nsw i32 %v286, %v277 + %v338 = mul nsw i32 %v337, -5 + %v339 = add nsw i32 %v283, %v280 + %v340 = mul nsw i32 %v339, 20 + %v341 = add nsw i32 %v340, %v274 + %v342 = add nsw i32 %v341, %v289 + %v343 = add nsw i32 %v342, %v338 + %v344 = add nsw i32 %v343, 512 + %v345 = lshr i32 %v344, 10 + %v346 = and i32 %v344, 262144 + %v347 = icmp eq i32 %v346, 0 + %v348 = icmp slt i32 %v343, -512 + %v349 = select i1 %v348, i32 0, i32 255 + %v350 = select i1 %v347, i32 %v345, i32 %v349 + %v351 = trunc i32 %v350 to i8 + %v352 = getelementptr inbounds i8, i8* %v261, i32 12 + store i8 %v351, i8* %v352, align 1, !tbaa !8 + %v353 = getelementptr inbounds i8, i8* %v261, i32 1 + %v354 = add nuw nsw i32 %v262, 1 + %v355 = icmp eq i32 %v354, 4 + br i1 %v355, label %b9, label %b8 + +b9: ; preds = %b8 + call void @llvm.lifetime.end.p0i8(i64 72, i8* nonnull %v1) #2 + ret void +} + +attributes #0 = { argmemonly nounwind } +attributes #1 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-length64b,+hvxv60" } +attributes #2 = { nounwind } + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!2, !2, i64 0} +!2 = !{!"any pointer", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C/C++ TBAA"} +!5 = !{!6, !2, i64 0} +!6 = !{!"", !2, i64 0, !7, i64 4, !7, i64 8, !7, i64 12, !7, i64 16} +!7 = !{!"int", !3, i64 0} +!8 = !{!3, !3, i64 0} +!9 = !{!10, !10, i64 0} +!10 = !{!"short", !3, i64 0}