Index: include/llvm/Analysis/TargetTransformInfo.h
===================================================================
--- include/llvm/Analysis/TargetTransformInfo.h
+++ include/llvm/Analysis/TargetTransformInfo.h
@@ -680,6 +680,11 @@
   /// size of the widest element type.
   bool shouldMaximizeVectorBandwidth(bool OptSize) const;
 
+  /// \return The minimum vectorization factor for types of given element
+  /// bit width, or 0 if there is no mimimum VF. The returned value only
+  /// applies when shouldMaximizeVectorBandwidth returns true.
+  unsigned getMinimumVF(unsigned ElemWidth) const;
+
   /// \return True if it should be considered for address type promotion.
   /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
   /// profitable without finding other extensions fed by the same input.
@@ -1074,6 +1079,7 @@
   virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
   virtual unsigned getMinVectorRegisterBitWidth() = 0;
   virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;
+  virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0;
   virtual bool shouldConsiderAddressTypePromotion(
       const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
   virtual unsigned getCacheLineSize() = 0;
@@ -1373,6 +1379,9 @@
   bool shouldMaximizeVectorBandwidth(bool OptSize) const override {
     return Impl.shouldMaximizeVectorBandwidth(OptSize);
   }
+  unsigned getMinimumVF(unsigned ElemWidth) const override {
+    return Impl.getMinimumVF(ElemWidth);
+  }
   bool shouldConsiderAddressTypePromotion(
       const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
     return Impl.shouldConsiderAddressTypePromotion(
Index: include/llvm/Analysis/TargetTransformInfoImpl.h
===================================================================
--- include/llvm/Analysis/TargetTransformInfoImpl.h
+++ include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -353,6 +353,8 @@
 
   bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; }
 
+  unsigned getMinimumVF(unsigned ElemWidth) const { return 0; }
+
   bool
   shouldConsiderAddressTypePromotion(const Instruction &I,
                                      bool &AllowPromotionWithoutCommonHeader) {
Index: lib/Analysis/TargetTransformInfo.cpp
===================================================================
--- lib/Analysis/TargetTransformInfo.cpp
+++ lib/Analysis/TargetTransformInfo.cpp
@@ -344,6 +344,10 @@
   return TTIImpl->shouldMaximizeVectorBandwidth(OptSize);
 }
 
+unsigned TargetTransformInfo::getMinimumVF(unsigned ElemWidth) const {
+  return TTIImpl->getMinimumVF(ElemWidth);
+}
+
 bool TargetTransformInfo::shouldConsiderAddressTypePromotion(
     const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
   return TTIImpl->shouldConsiderAddressTypePromotion(
Index: lib/Target/Hexagon/HexagonTargetTransformInfo.h
===================================================================
--- lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -74,6 +74,7 @@
   unsigned getRegisterBitWidth(bool Vector) const;
   unsigned getMinVectorRegisterBitWidth() const;
   bool shouldMaximizeVectorBandwidth(bool OptSize) const { return true; }
+  unsigned getMinimumVF(unsigned ElemWidth) const;
 
   bool supportsEfficientVectorElementLoadStore() {
     return false;
Index: lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
===================================================================
--- lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -76,6 +76,10 @@
   return getST()->useHVXOps() ? getST()->getVectorLength()*8 : 0;
 }
 
+unsigned HexagonTTIImpl::getMinimumVF(unsigned ElemWidth) const {
+  return (8 * getST()->getVectorLength()) / ElemWidth;
+}
+
 unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
       unsigned Alignment, unsigned AddressSpace, const Instruction *I) {
   if (Opcode == Instruction::Load && Src->isVectorTy()) {
Index: lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- lib/Transforms/Vectorize/LoopVectorize.cpp
+++ lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6160,6 +6160,13 @@
         break;
       }
     }
+    if (unsigned MinVF = TTI.getMinimumVF(SmallestType)) {
+      if (MaxVF < MinVF) {
+        DEBUG(dbgs() << "LV: Overriding calculated MaxVF(" << MaxVF
+                     << ") with target's minimum: " << MinVF << '\n');
+        MaxVF = MinVF;
+      }
+    }
   }
   return MaxVF;
 }
Index: test/Transforms/LoopVectorize/Hexagon/lit.local.cfg
===================================================================
--- /dev/null
+++ test/Transforms/LoopVectorize/Hexagon/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'Hexagon' in config.root.targets:
+    config.unsupported = True
Index: test/Transforms/LoopVectorize/Hexagon/minimum-vf.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopVectorize/Hexagon/minimum-vf.ll
@@ -0,0 +1,439 @@
+; RUN: opt -march=hexagon -loop-vectorize -hexagon-autohvx -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+; Check that TTI::getMinimumVF works.
+; CHECK: LV: Overriding calculated MaxVF(9) with target's minimum: 64
+
+target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+target triple = "hexagon"
+
+%s.0 = type { i8*, i32, i32, i32, i32 }
+
+@g0 = external dso_local local_unnamed_addr global %s.0**, align 4
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind
+define hidden fastcc void @f0(i8* nocapture %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i8 zeroext %a5) unnamed_addr #1 {
+b0:
+  %v0 = alloca [4 x [9 x i16]], align 8
+  %v1 = bitcast [4 x [9 x i16]]* %v0 to i8*
+  call void @llvm.lifetime.start.p0i8(i64 72, i8* nonnull %v1) #2
+  %v2 = add i32 %a1, -2
+  %v3 = add i32 %a3, -9
+  %v4 = icmp ugt i32 %v2, %v3
+  %v5 = add i32 %a2, -2
+  %v6 = add i32 %a4, -9
+  %v7 = icmp ugt i32 %v5, %v6
+  %v8 = or i1 %v4, %v7
+  %v9 = load %s.0**, %s.0*** @g0, align 4, !tbaa !1
+  %v10 = zext i8 %a5 to i32
+  %v11 = getelementptr inbounds %s.0*, %s.0** %v9, i32 %v10
+  %v12 = load %s.0*, %s.0** %v11, align 4, !tbaa !1
+  %v13 = getelementptr inbounds %s.0, %s.0* %v12, i32 0, i32 0
+  %v14 = load i8*, i8** %v13, align 4, !tbaa !5
+  br i1 %v8, label %b3, label %b1
+
+b1:                                               ; preds = %b0
+  %v15 = mul i32 %v5, %a3
+  %v16 = getelementptr inbounds i8, i8* %v14, i32 %v15
+  %v17 = getelementptr inbounds i8, i8* %v16, i32 %v2
+  br label %b2
+
+b2:                                               ; preds = %b2, %b1
+  %v18 = phi i32 [ 0, %b1 ], [ %v83, %b2 ]
+  %v19 = phi i8* [ %v17, %b1 ], [ %v82, %b2 ]
+  %v20 = load i8, i8* %v19, align 1, !tbaa !8
+  %v21 = zext i8 %v20 to i32
+  %v22 = getelementptr inbounds i8, i8* %v19, i32 1
+  %v23 = load i8, i8* %v22, align 1, !tbaa !8
+  %v24 = zext i8 %v23 to i32
+  %v25 = getelementptr inbounds i8, i8* %v19, i32 2
+  %v26 = load i8, i8* %v25, align 1, !tbaa !8
+  %v27 = zext i8 %v26 to i32
+  %v28 = getelementptr inbounds i8, i8* %v19, i32 3
+  %v29 = load i8, i8* %v28, align 1, !tbaa !8
+  %v30 = zext i8 %v29 to i32
+  %v31 = getelementptr inbounds i8, i8* %v19, i32 4
+  %v32 = load i8, i8* %v31, align 1, !tbaa !8
+  %v33 = zext i8 %v32 to i32
+  %v34 = getelementptr inbounds i8, i8* %v19, i32 5
+  %v35 = load i8, i8* %v34, align 1, !tbaa !8
+  %v36 = zext i8 %v35 to i32
+  %v37 = getelementptr inbounds i8, i8* %v19, i32 6
+  %v38 = load i8, i8* %v37, align 1, !tbaa !8
+  %v39 = zext i8 %v38 to i32
+  %v40 = getelementptr inbounds i8, i8* %v19, i32 7
+  %v41 = load i8, i8* %v40, align 1, !tbaa !8
+  %v42 = zext i8 %v41 to i32
+  %v43 = getelementptr inbounds i8, i8* %v19, i32 8
+  %v44 = load i8, i8* %v43, align 1, !tbaa !8
+  %v45 = zext i8 %v44 to i32
+  %v46 = add nuw nsw i32 %v33, %v24
+  %v47 = mul nsw i32 %v46, -5
+  %v48 = add nuw nsw i32 %v30, %v27
+  %v49 = mul nuw nsw i32 %v48, 20
+  %v50 = add nuw nsw i32 %v49, %v21
+  %v51 = add nuw nsw i32 %v50, %v36
+  %v52 = add nsw i32 %v51, %v47
+  %v53 = trunc i32 %v52 to i16
+  %v54 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 0, i32 %v18
+  store i16 %v53, i16* %v54, align 2, !tbaa !9
+  %v55 = add nuw nsw i32 %v36, %v27
+  %v56 = mul nsw i32 %v55, -5
+  %v57 = add nuw nsw i32 %v33, %v30
+  %v58 = mul nuw nsw i32 %v57, 20
+  %v59 = add nuw nsw i32 %v58, %v24
+  %v60 = add nuw nsw i32 %v59, %v39
+  %v61 = add nsw i32 %v60, %v56
+  %v62 = trunc i32 %v61 to i16
+  %v63 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 1, i32 %v18
+  store i16 %v62, i16* %v63, align 2, !tbaa !9
+  %v64 = add nuw nsw i32 %v39, %v30
+  %v65 = mul nsw i32 %v64, -5
+  %v66 = add nuw nsw i32 %v36, %v33
+  %v67 = mul nuw nsw i32 %v66, 20
+  %v68 = add nuw nsw i32 %v67, %v27
+  %v69 = add nuw nsw i32 %v68, %v42
+  %v70 = add nsw i32 %v69, %v65
+  %v71 = trunc i32 %v70 to i16
+  %v72 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 2, i32 %v18
+  store i16 %v71, i16* %v72, align 2, !tbaa !9
+  %v73 = add nuw nsw i32 %v42, %v33
+  %v74 = mul nsw i32 %v73, -5
+  %v75 = add nuw nsw i32 %v39, %v36
+  %v76 = mul nuw nsw i32 %v75, 20
+  %v77 = add nuw nsw i32 %v76, %v30
+  %v78 = add nuw nsw i32 %v77, %v45
+  %v79 = add nsw i32 %v78, %v74
+  %v80 = trunc i32 %v79 to i16
+  %v81 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 3, i32 %v18
+  store i16 %v80, i16* %v81, align 2, !tbaa !9
+  %v82 = getelementptr inbounds i8, i8* %v19, i32 %a3
+  %v83 = add nuw nsw i32 %v18, 1
+  %v84 = icmp eq i32 %v83, 9
+  br i1 %v84, label %b6, label %b2
+
+b3:                                               ; preds = %b0
+  %v85 = add nsw i32 %a3, -1
+  %v86 = add nsw i32 %a4, -1
+  %v87 = add i32 %a1, 1
+  %v88 = add i32 %a1, 2
+  %v89 = icmp slt i32 %v87, 0
+  %v90 = icmp slt i32 %v87, %a3
+  %v91 = select i1 %v90, i32 %v87, i32 %v85
+  %v92 = select i1 %v89, i32 0, i32 %v91
+  %v93 = add i32 %a1, -1
+  %v94 = icmp slt i32 %v2, 0
+  %v95 = icmp slt i32 %v2, %a3
+  %v96 = select i1 %v95, i32 %v2, i32 %v85
+  %v97 = select i1 %v94, i32 0, i32 %v96
+  %v98 = icmp slt i32 %v93, 0
+  %v99 = icmp slt i32 %v93, %a3
+  %v100 = select i1 %v99, i32 %v93, i32 %v85
+  %v101 = select i1 %v98, i32 0, i32 %v100
+  %v102 = add nsw i32 %a1, 1
+  %v103 = icmp slt i32 %a1, 0
+  %v104 = icmp slt i32 %a1, %a3
+  %v105 = select i1 %v104, i32 %a1, i32 %v85
+  %v106 = select i1 %v103, i32 0, i32 %v105
+  %v107 = add nsw i32 %a1, 2
+  %v108 = icmp slt i32 %a1, -1
+  %v109 = icmp slt i32 %v102, %a3
+  %v110 = select i1 %v109, i32 %v102, i32 %v85
+  %v111 = select i1 %v108, i32 0, i32 %v110
+  %v112 = add nsw i32 %a1, 3
+  %v113 = icmp slt i32 %a1, -2
+  %v114 = icmp slt i32 %v107, %a3
+  %v115 = select i1 %v114, i32 %v107, i32 %v85
+  %v116 = select i1 %v113, i32 0, i32 %v115
+  %v117 = icmp slt i32 %a1, -3
+  %v118 = icmp slt i32 %v112, %a3
+  %v119 = select i1 %v118, i32 %v112, i32 %v85
+  %v120 = select i1 %v117, i32 0, i32 %v119
+  %v121 = add nsw i32 %a1, 4
+  %v122 = icmp slt i32 %a1, -4
+  %v123 = icmp slt i32 %v121, %a3
+  %v124 = select i1 %v123, i32 %v121, i32 %v85
+  %v125 = select i1 %v122, i32 0, i32 %v124
+  %v126 = add nsw i32 %a1, 5
+  %v127 = icmp slt i32 %a1, -5
+  %v128 = icmp slt i32 %v126, %a3
+  %v129 = select i1 %v128, i32 %v126, i32 %v85
+  %v130 = select i1 %v127, i32 0, i32 %v129
+  %v131 = add i32 %a1, 3
+  %v132 = icmp slt i32 %v88, 0
+  %v133 = icmp slt i32 %v88, %a3
+  %v134 = select i1 %v133, i32 %v88, i32 %v85
+  %v135 = select i1 %v132, i32 0, i32 %v134
+  %v136 = add i32 %a1, 4
+  %v137 = icmp slt i32 %v131, 0
+  %v138 = icmp slt i32 %v131, %a3
+  %v139 = select i1 %v138, i32 %v131, i32 %v85
+  %v140 = select i1 %v137, i32 0, i32 %v139
+  %v141 = add i32 %a1, 5
+  %v142 = icmp slt i32 %v136, 0
+  %v143 = icmp slt i32 %v136, %a3
+  %v144 = select i1 %v143, i32 %v136, i32 %v85
+  %v145 = select i1 %v142, i32 0, i32 %v144
+  %v146 = add i32 %a1, 6
+  %v147 = icmp slt i32 %v141, 0
+  %v148 = icmp slt i32 %v141, %a3
+  %v149 = select i1 %v148, i32 %v141, i32 %v85
+  %v150 = select i1 %v147, i32 0, i32 %v149
+  %v151 = icmp slt i32 %v146, 0
+  %v152 = icmp slt i32 %v146, %a3
+  %v153 = select i1 %v152, i32 %v146, i32 %v85
+  %v154 = select i1 %v151, i32 0, i32 %v153
+  br label %b4
+
+b4:                                               ; preds = %b4, %b3
+  %v155 = phi i32 [ 0, %b3 ], [ %v259, %b4 ]
+  %v156 = add i32 %v5, %v155
+  %v157 = icmp slt i32 %v156, 0
+  %v158 = icmp slt i32 %v156, %a4
+  %v159 = select i1 %v158, i32 %v156, i32 %v86
+  %v160 = select i1 %v157, i32 0, i32 %v159
+  %v161 = mul i32 %v160, %a3
+  %v162 = add i32 %v97, %v161
+  %v163 = getelementptr inbounds i8, i8* %v14, i32 %v162
+  %v164 = load i8, i8* %v163, align 1, !tbaa !8
+  %v165 = zext i8 %v164 to i32
+  %v166 = add i32 %v101, %v161
+  %v167 = getelementptr inbounds i8, i8* %v14, i32 %v166
+  %v168 = load i8, i8* %v167, align 1, !tbaa !8
+  %v169 = zext i8 %v168 to i32
+  %v170 = mul nsw i32 %v169, -5
+  %v171 = add nsw i32 %v170, %v165
+  %v172 = add i32 %v106, %v161
+  %v173 = getelementptr inbounds i8, i8* %v14, i32 %v172
+  %v174 = load i8, i8* %v173, align 1, !tbaa !8
+  %v175 = zext i8 %v174 to i32
+  %v176 = mul nuw nsw i32 %v175, 20
+  %v177 = add nsw i32 %v176, %v171
+  %v178 = add i32 %v111, %v161
+  %v179 = getelementptr inbounds i8, i8* %v14, i32 %v178
+  %v180 = load i8, i8* %v179, align 1, !tbaa !8
+  %v181 = zext i8 %v180 to i32
+  %v182 = mul nuw nsw i32 %v181, 20
+  %v183 = add nsw i32 %v182, %v177
+  %v184 = add i32 %v116, %v161
+  %v185 = getelementptr inbounds i8, i8* %v14, i32 %v184
+  %v186 = load i8, i8* %v185, align 1, !tbaa !8
+  %v187 = zext i8 %v186 to i32
+  %v188 = mul nsw i32 %v187, -5
+  %v189 = add nsw i32 %v188, %v183
+  %v190 = add i32 %v120, %v161
+  %v191 = getelementptr inbounds i8, i8* %v14, i32 %v190
+  %v192 = load i8, i8* %v191, align 1, !tbaa !8
+  %v193 = zext i8 %v192 to i32
+  %v194 = add nsw i32 %v189, %v193
+  %v195 = trunc i32 %v194 to i16
+  %v196 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 0, i32 %v155
+  store i16 %v195, i16* %v196, align 2, !tbaa !9
+  %v197 = mul nsw i32 %v175, -5
+  %v198 = add nsw i32 %v197, %v169
+  %v199 = add nsw i32 %v182, %v198
+  %v200 = mul nuw nsw i32 %v187, 20
+  %v201 = add nsw i32 %v200, %v199
+  %v202 = mul nsw i32 %v193, -5
+  %v203 = add nsw i32 %v202, %v201
+  %v204 = add i32 %v125, %v161
+  %v205 = getelementptr inbounds i8, i8* %v14, i32 %v204
+  %v206 = load i8, i8* %v205, align 1, !tbaa !8
+  %v207 = zext i8 %v206 to i32
+  %v208 = add nsw i32 %v203, %v207
+  %v209 = trunc i32 %v208 to i16
+  %v210 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 1, i32 %v155
+  store i16 %v209, i16* %v210, align 2, !tbaa !9
+  %v211 = mul nsw i32 %v181, -5
+  %v212 = add nsw i32 %v211, %v175
+  %v213 = add nsw i32 %v200, %v212
+  %v214 = mul nuw nsw i32 %v193, 20
+  %v215 = add nsw i32 %v214, %v213
+  %v216 = mul nsw i32 %v207, -5
+  %v217 = add nsw i32 %v216, %v215
+  %v218 = add i32 %v130, %v161
+  %v219 = getelementptr inbounds i8, i8* %v14, i32 %v218
+  %v220 = load i8, i8* %v219, align 1, !tbaa !8
+  %v221 = zext i8 %v220 to i32
+  %v222 = add nsw i32 %v217, %v221
+  %v223 = trunc i32 %v222 to i16
+  %v224 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 2, i32 %v155
+  store i16 %v223, i16* %v224, align 2, !tbaa !9
+  %v225 = add i32 %v92, %v161
+  %v226 = getelementptr inbounds i8, i8* %v14, i32 %v225
+  %v227 = load i8, i8* %v226, align 1, !tbaa !8
+  %v228 = zext i8 %v227 to i16
+  %v229 = add i32 %v135, %v161
+  %v230 = getelementptr inbounds i8, i8* %v14, i32 %v229
+  %v231 = load i8, i8* %v230, align 1, !tbaa !8
+  %v232 = zext i8 %v231 to i16
+  %v233 = mul nsw i16 %v232, -5
+  %v234 = add nsw i16 %v233, %v228
+  %v235 = add i32 %v140, %v161
+  %v236 = getelementptr inbounds i8, i8* %v14, i32 %v235
+  %v237 = load i8, i8* %v236, align 1, !tbaa !8
+  %v238 = zext i8 %v237 to i16
+  %v239 = mul nuw nsw i16 %v238, 20
+  %v240 = add nsw i16 %v239, %v234
+  %v241 = add i32 %v145, %v161
+  %v242 = getelementptr inbounds i8, i8* %v14, i32 %v241
+  %v243 = load i8, i8* %v242, align 1, !tbaa !8
+  %v244 = zext i8 %v243 to i16
+  %v245 = mul nuw nsw i16 %v244, 20
+  %v246 = add i16 %v245, %v240
+  %v247 = add i32 %v150, %v161
+  %v248 = getelementptr inbounds i8, i8* %v14, i32 %v247
+  %v249 = load i8, i8* %v248, align 1, !tbaa !8
+  %v250 = zext i8 %v249 to i16
+  %v251 = mul nsw i16 %v250, -5
+  %v252 = add i16 %v251, %v246
+  %v253 = add i32 %v154, %v161
+  %v254 = getelementptr inbounds i8, i8* %v14, i32 %v253
+  %v255 = load i8, i8* %v254, align 1, !tbaa !8
+  %v256 = zext i8 %v255 to i16
+  %v257 = add i16 %v252, %v256
+  %v258 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 3, i32 %v155
+  store i16 %v257, i16* %v258, align 2, !tbaa !9
+  %v259 = add nuw nsw i32 %v155, 1
+  %v260 = icmp eq i32 %v259, 9
+  br i1 %v260, label %b5, label %b4
+
+b5:                                               ; preds = %b4
+  br label %b7
+
+b6:                                               ; preds = %b2
+  br label %b7
+
+b7:                                               ; preds = %b6, %b5
+  br label %b8
+
+b8:                                               ; preds = %b8, %b7
+  %v261 = phi i8* [ %a0, %b7 ], [ %v353, %b8 ]
+  %v262 = phi i32 [ 0, %b7 ], [ %v354, %b8 ]
+  %v263 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 %v262, i32 0
+  %v264 = load i16, i16* %v263, align 2, !tbaa !9
+  %v265 = sext i16 %v264 to i32
+  %v266 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 %v262, i32 1
+  %v267 = load i16, i16* %v266, align 2, !tbaa !9
+  %v268 = sext i16 %v267 to i32
+  %v269 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 %v262, i32 2
+  %v270 = load i16, i16* %v269, align 2, !tbaa !9
+  %v271 = sext i16 %v270 to i32
+  %v272 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 %v262, i32 3
+  %v273 = load i16, i16* %v272, align 2, !tbaa !9
+  %v274 = sext i16 %v273 to i32
+  %v275 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 %v262, i32 4
+  %v276 = load i16, i16* %v275, align 2, !tbaa !9
+  %v277 = sext i16 %v276 to i32
+  %v278 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 %v262, i32 5
+  %v279 = load i16, i16* %v278, align 2, !tbaa !9
+  %v280 = sext i16 %v279 to i32
+  %v281 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 %v262, i32 6
+  %v282 = load i16, i16* %v281, align 2, !tbaa !9
+  %v283 = sext i16 %v282 to i32
+  %v284 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 %v262, i32 7
+  %v285 = load i16, i16* %v284, align 2, !tbaa !9
+  %v286 = sext i16 %v285 to i32
+  %v287 = getelementptr inbounds [4 x [9 x i16]], [4 x [9 x i16]]* %v0, i32 0, i32 %v262, i32 8
+  %v288 = load i16, i16* %v287, align 2, !tbaa !9
+  %v289 = sext i16 %v288 to i32
+  %v290 = add nsw i32 %v277, %v268
+  %v291 = mul nsw i32 %v290, -5
+  %v292 = add nsw i32 %v274, %v271
+  %v293 = mul nsw i32 %v292, 20
+  %v294 = add nsw i32 %v293, %v265
+  %v295 = add nsw i32 %v294, %v280
+  %v296 = add nsw i32 %v295, %v291
+  %v297 = add nsw i32 %v296, 512
+  %v298 = lshr i32 %v297, 10
+  %v299 = and i32 %v297, 262144
+  %v300 = icmp eq i32 %v299, 0
+  %v301 = icmp slt i32 %v296, -512
+  %v302 = select i1 %v301, i32 0, i32 255
+  %v303 = select i1 %v300, i32 %v298, i32 %v302
+  %v304 = trunc i32 %v303 to i8
+  store i8 %v304, i8* %v261, align 1, !tbaa !8
+  %v305 = add nsw i32 %v280, %v271
+  %v306 = mul nsw i32 %v305, -5
+  %v307 = add nsw i32 %v277, %v274
+  %v308 = mul nsw i32 %v307, 20
+  %v309 = add nsw i32 %v308, %v268
+  %v310 = add nsw i32 %v309, %v283
+  %v311 = add nsw i32 %v310, %v306
+  %v312 = add nsw i32 %v311, 512
+  %v313 = lshr i32 %v312, 10
+  %v314 = and i32 %v312, 262144
+  %v315 = icmp eq i32 %v314, 0
+  %v316 = icmp slt i32 %v311, -512
+  %v317 = select i1 %v316, i32 0, i32 255
+  %v318 = select i1 %v315, i32 %v313, i32 %v317
+  %v319 = trunc i32 %v318 to i8
+  %v320 = getelementptr inbounds i8, i8* %v261, i32 4
+  store i8 %v319, i8* %v320, align 1, !tbaa !8
+  %v321 = add nsw i32 %v283, %v274
+  %v322 = mul nsw i32 %v321, -5
+  %v323 = add nsw i32 %v280, %v277
+  %v324 = mul nsw i32 %v323, 20
+  %v325 = add nsw i32 %v324, %v271
+  %v326 = add nsw i32 %v325, %v286
+  %v327 = add nsw i32 %v326, %v322
+  %v328 = add nsw i32 %v327, 512
+  %v329 = lshr i32 %v328, 10
+  %v330 = and i32 %v328, 262144
+  %v331 = icmp eq i32 %v330, 0
+  %v332 = icmp slt i32 %v327, -512
+  %v333 = select i1 %v332, i32 0, i32 255
+  %v334 = select i1 %v331, i32 %v329, i32 %v333
+  %v335 = trunc i32 %v334 to i8
+  %v336 = getelementptr inbounds i8, i8* %v261, i32 8
+  store i8 %v335, i8* %v336, align 1, !tbaa !8
+  %v337 = add nsw i32 %v286, %v277
+  %v338 = mul nsw i32 %v337, -5
+  %v339 = add nsw i32 %v283, %v280
+  %v340 = mul nsw i32 %v339, 20
+  %v341 = add nsw i32 %v340, %v274
+  %v342 = add nsw i32 %v341, %v289
+  %v343 = add nsw i32 %v342, %v338
+  %v344 = add nsw i32 %v343, 512
+  %v345 = lshr i32 %v344, 10
+  %v346 = and i32 %v344, 262144
+  %v347 = icmp eq i32 %v346, 0
+  %v348 = icmp slt i32 %v343, -512
+  %v349 = select i1 %v348, i32 0, i32 255
+  %v350 = select i1 %v347, i32 %v345, i32 %v349
+  %v351 = trunc i32 %v350 to i8
+  %v352 = getelementptr inbounds i8, i8* %v261, i32 12
+  store i8 %v351, i8* %v352, align 1, !tbaa !8
+  %v353 = getelementptr inbounds i8, i8* %v261, i32 1
+  %v354 = add nuw nsw i32 %v262, 1
+  %v355 = icmp eq i32 %v354, 4
+  br i1 %v355, label %b9, label %b8
+
+b9:                                               ; preds = %b8
+  call void @llvm.lifetime.end.p0i8(i64 72, i8* nonnull %v1) #2
+  ret void
+}
+
+attributes #0 = { argmemonly nounwind }
+attributes #1 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-length64b,+hvxv60" }
+attributes #2 = { nounwind }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"any pointer", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{!6, !2, i64 0}
+!6 = !{!"", !2, i64 0, !7, i64 4, !7, i64 8, !7, i64 12, !7, i64 16}
+!7 = !{!"int", !3, i64 0}
+!8 = !{!3, !3, i64 0}
+!9 = !{!10, !10, i64 0}
+!10 = !{!"short", !3, i64 0}