Index: lib/Transforms/Vectorize/LoadStoreVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -737,9 +737,18 @@ ArrayRef NewChain = getVectorizablePrefix(Chain); if (NewChain.empty()) { - // No vectorization possible. - InstructionsProcessed->insert(Chain.begin(), Chain.end()); - return false; + // No vectorization possible if the number of instructions + // is less than or equal to VF. + if (ChainSize <= VF) { + InstructionsProcessed->insert(Chain.begin(), Chain.end()); + return false; + } + // Split the chain and try each slice seperately in order + // to increase the number of vector instructions generated. + ArrayRef Left = Chain.slice(0, VF); + ArrayRef Right = Chain.slice(VF); + return vectorizeStoreChain(Left, InstructionsProcessed) | + vectorizeStoreChain(Right, InstructionsProcessed); } if (NewChain.size() == 1) { // Failed after the first instruction. Discard it and try the smaller chain. @@ -885,9 +894,18 @@ ArrayRef NewChain = getVectorizablePrefix(Chain); if (NewChain.empty()) { - // No vectorization possible. - InstructionsProcessed->insert(Chain.begin(), Chain.end()); - return false; + // No vectorization possible if the number of instructions + // is less than or equal to VF. + if (ChainSize <= VF) { + InstructionsProcessed->insert(Chain.begin(), Chain.end()); + return false; + } + // Split the chain and try each slice seperately in order + // to increase the number of vector instructions generated. + ArrayRef Left = Chain.slice(0, VF); + ArrayRef Right = Chain.slice(VF); + return vectorizeLoadChain(Left, InstructionsProcessed) | + vectorizeLoadChain(Right, InstructionsProcessed); } if (NewChain.size() == 1) { // Failed after the first instruction. Discard it and try the smaller chain. Index: test/Transforms/LoadStoreVectorizer/AMDGPU/empty-prefix.ll =================================================================== --- /dev/null +++ test/Transforms/LoadStoreVectorizer/AMDGPU/empty-prefix.ll @@ -0,0 +1,330 @@ +; RUN: opt -mtriple=amdgcn-amd-amdhsa -load-store-vectorizer -S -o - %s | FileCheck %s + +; Split the chain and try again if the vectorizable +; prefix is empy. It will vectorize 48 more scalars. + +; CHECK-LABEL: foo +; CHECK: load <4 x float> +; CHECK: load <4 x float> +; CHECK: load <4 x float> +; CHECK: load <4 x float> +; CHECK: load <4 x float> +; CHECK: load <4 x float> +; CHECK: load <4 x float> +; CHECK: load <4 x float> +; CHECK: store <4 x float> +; CHECK: store <4 x float> +; CHECK: store <4 x float> +; CHECK: store <4 x float> +; CHECK: store <4 x float> +; CHECK: store <4 x float> +; CHECK: store <4 x float> +; CHECK: store <4 x float> +; CHECK: load <4 x float> +; CHECK: load <4 x float> +; CHECK: load <4 x float> +; CHECK: load <4 x float> +; CHECK: store <4 x float> +; CHECK: store <4 x float> +; CHECK: store <4 x float> +; CHECK: store <4 x float> + +; Function Attrs: nounwind +define void @foo(i8 addrspace(1)* %a, i8 addrspace(1)* %b, i64 %n) #0 { +bb: + %tmp = bitcast i8 addrspace(1)* %b to float addrspace(1)* + %tmp1 = load float, float addrspace(1)* %tmp, align 4 + %tmp2 = getelementptr i8, i8 addrspace(1)* %b, i64 4 + %tmp3 = bitcast i8 addrspace(1)* %tmp2 to float addrspace(1)* + %tmp4 = load float, float addrspace(1)* %tmp3, align 4 + %tmp5 = getelementptr i8, i8 addrspace(1)* %b, i64 8 + %tmp6 = bitcast i8 addrspace(1)* %tmp5 to float addrspace(1)* + %tmp7 = load float, float addrspace(1)* %tmp6, align 4 + %tmp8 = getelementptr i8, i8 addrspace(1)* %b, i64 12 + %tmp9 = bitcast i8 addrspace(1)* %tmp8 to float addrspace(1)* + %tmp10 = load float, float addrspace(1)* %tmp9, align 4 + %tmp11 = getelementptr i8, i8 addrspace(1)* %b, i64 16 + %tmp12 = bitcast i8 addrspace(1)* %tmp11 to float addrspace(1)* + %tmp13 = load float, float addrspace(1)* %tmp12, align 4 + %tmp14 = getelementptr i8, i8 addrspace(1)* %b, i64 20 + %tmp15 = bitcast i8 addrspace(1)* %tmp14 to float addrspace(1)* + %tmp16 = load float, float addrspace(1)* %tmp15, align 4 + %tmp17 = getelementptr i8, i8 addrspace(1)* %b, i64 24 + %tmp18 = bitcast i8 addrspace(1)* %tmp17 to float addrspace(1)* + %tmp19 = load float, float addrspace(1)* %tmp18, align 4 + %tmp20 = getelementptr i8, i8 addrspace(1)* %b, i64 28 + %tmp21 = bitcast i8 addrspace(1)* %tmp20 to float addrspace(1)* + %tmp22 = load float, float addrspace(1)* %tmp21, align 4 + %tmp23 = getelementptr i8, i8 addrspace(1)* %b, i64 32 + %tmp24 = bitcast i8 addrspace(1)* %tmp23 to float addrspace(1)* + %tmp25 = load float, float addrspace(1)* %tmp24, align 4 + %tmp26 = getelementptr i8, i8 addrspace(1)* %b, i64 36 + %tmp27 = bitcast i8 addrspace(1)* %tmp26 to float addrspace(1)* + %tmp28 = load float, float addrspace(1)* %tmp27, align 4 + %tmp29 = getelementptr i8, i8 addrspace(1)* %b, i64 40 + %tmp30 = bitcast i8 addrspace(1)* %tmp29 to float addrspace(1)* + %tmp31 = load float, float addrspace(1)* %tmp30, align 4 + %tmp32 = getelementptr i8, i8 addrspace(1)* %b, i64 44 + %tmp33 = bitcast i8 addrspace(1)* %tmp32 to float addrspace(1)* + %tmp34 = load float, float addrspace(1)* %tmp33, align 4 + %tmp35 = getelementptr i8, i8 addrspace(1)* %b, i64 48 + %tmp36 = bitcast i8 addrspace(1)* %tmp35 to float addrspace(1)* + %tmp37 = load float, float addrspace(1)* %tmp36, align 4 + %tmp38 = getelementptr i8, i8 addrspace(1)* %b, i64 52 + %tmp39 = bitcast i8 addrspace(1)* %tmp38 to float addrspace(1)* + %tmp40 = load float, float addrspace(1)* %tmp39, align 4 + %tmp41 = getelementptr i8, i8 addrspace(1)* %b, i64 56 + %tmp42 = bitcast i8 addrspace(1)* %tmp41 to float addrspace(1)* + %tmp43 = load float, float addrspace(1)* %tmp42, align 4 + %tmp44 = getelementptr i8, i8 addrspace(1)* %b, i64 60 + %tmp45 = bitcast i8 addrspace(1)* %tmp44 to float addrspace(1)* + %tmp46 = load float, float addrspace(1)* %tmp45, align 4 + %tmp47 = getelementptr i8, i8 addrspace(1)* %b, i64 64 + %tmp48 = bitcast i8 addrspace(1)* %tmp47 to float addrspace(1)* + %tmp49 = load float, float addrspace(1)* %tmp48, align 4 + %tmp50 = getelementptr i8, i8 addrspace(1)* %b, i64 68 + %tmp51 = bitcast i8 addrspace(1)* %tmp50 to float addrspace(1)* + %tmp52 = load float, float addrspace(1)* %tmp51, align 4 + %tmp53 = getelementptr i8, i8 addrspace(1)* %b, i64 72 + %tmp54 = bitcast i8 addrspace(1)* %tmp53 to float addrspace(1)* + %tmp55 = load float, float addrspace(1)* %tmp54, align 4 + %tmp56 = getelementptr i8, i8 addrspace(1)* %b, i64 76 + %tmp57 = bitcast i8 addrspace(1)* %tmp56 to float addrspace(1)* + %tmp58 = load float, float addrspace(1)* %tmp57, align 4 + %tmp59 = getelementptr i8, i8 addrspace(1)* %b, i64 80 + %tmp60 = bitcast i8 addrspace(1)* %tmp59 to float addrspace(1)* + %tmp61 = load float, float addrspace(1)* %tmp60, align 4 + %tmp62 = getelementptr i8, i8 addrspace(1)* %b, i64 84 + %tmp63 = bitcast i8 addrspace(1)* %tmp62 to float addrspace(1)* + %tmp64 = load float, float addrspace(1)* %tmp63, align 4 + %tmp65 = getelementptr i8, i8 addrspace(1)* %b, i64 88 + %tmp66 = bitcast i8 addrspace(1)* %tmp65 to float addrspace(1)* + %tmp67 = load float, float addrspace(1)* %tmp66, align 4 + %tmp68 = getelementptr i8, i8 addrspace(1)* %b, i64 92 + %tmp69 = bitcast i8 addrspace(1)* %tmp68 to float addrspace(1)* + %tmp70 = load float, float addrspace(1)* %tmp69, align 4 + %tmp71 = getelementptr i8, i8 addrspace(1)* %b, i64 96 + %tmp72 = bitcast i8 addrspace(1)* %tmp71 to float addrspace(1)* + %tmp73 = load float, float addrspace(1)* %tmp72, align 4 + %tmp74 = getelementptr i8, i8 addrspace(1)* %b, i64 100 + %tmp75 = bitcast i8 addrspace(1)* %tmp74 to float addrspace(1)* + %tmp76 = load float, float addrspace(1)* %tmp75, align 4 + %tmp77 = getelementptr i8, i8 addrspace(1)* %b, i64 104 + %tmp78 = bitcast i8 addrspace(1)* %tmp77 to float addrspace(1)* + %tmp79 = load float, float addrspace(1)* %tmp78, align 4 + %tmp80 = getelementptr i8, i8 addrspace(1)* %b, i64 108 + %tmp81 = bitcast i8 addrspace(1)* %tmp80 to float addrspace(1)* + %tmp82 = load float, float addrspace(1)* %tmp81, align 4 + %tmp83 = getelementptr i8, i8 addrspace(1)* %b, i64 112 + %tmp84 = bitcast i8 addrspace(1)* %tmp83 to float addrspace(1)* + %tmp85 = load float, float addrspace(1)* %tmp84, align 4 + %tmp86 = getelementptr i8, i8 addrspace(1)* %b, i64 116 + %tmp87 = bitcast i8 addrspace(1)* %tmp86 to float addrspace(1)* + %tmp88 = load float, float addrspace(1)* %tmp87, align 4 + %tmp89 = getelementptr i8, i8 addrspace(1)* %b, i64 120 + %tmp90 = bitcast i8 addrspace(1)* %tmp89 to float addrspace(1)* + %tmp91 = load float, float addrspace(1)* %tmp90, align 4 + %tmp92 = getelementptr i8, i8 addrspace(1)* %b, i64 124 + %tmp93 = bitcast i8 addrspace(1)* %tmp92 to float addrspace(1)* + %tmp94 = load float, float addrspace(1)* %tmp93, align 4 + %tmp95 = bitcast i8 addrspace(1)* %a to float addrspace(1)* + store float %tmp1, float addrspace(1)* %tmp95, align 4 + %tmp96 = getelementptr i8, i8 addrspace(1)* %a, i64 4 + %tmp97 = bitcast i8 addrspace(1)* %tmp96 to float addrspace(1)* + store float %tmp4, float addrspace(1)* %tmp97, align 4 + %tmp98 = getelementptr i8, i8 addrspace(1)* %a, i64 8 + %tmp99 = bitcast i8 addrspace(1)* %tmp98 to float addrspace(1)* + store float %tmp7, float addrspace(1)* %tmp99, align 4 + %tmp100 = getelementptr i8, i8 addrspace(1)* %a, i64 12 + %tmp101 = bitcast i8 addrspace(1)* %tmp100 to float addrspace(1)* + store float %tmp10, float addrspace(1)* %tmp101, align 4 + %tmp102 = getelementptr i8, i8 addrspace(1)* %a, i64 16 + %tmp103 = bitcast i8 addrspace(1)* %tmp102 to float addrspace(1)* + store float %tmp13, float addrspace(1)* %tmp103, align 4 + %tmp104 = getelementptr i8, i8 addrspace(1)* %a, i64 20 + %tmp105 = bitcast i8 addrspace(1)* %tmp104 to float addrspace(1)* + store float %tmp16, float addrspace(1)* %tmp105, align 4 + %tmp106 = getelementptr i8, i8 addrspace(1)* %a, i64 24 + %tmp107 = bitcast i8 addrspace(1)* %tmp106 to float addrspace(1)* + store float %tmp19, float addrspace(1)* %tmp107, align 4 + %tmp108 = getelementptr i8, i8 addrspace(1)* %a, i64 28 + %tmp109 = bitcast i8 addrspace(1)* %tmp108 to float addrspace(1)* + store float %tmp22, float addrspace(1)* %tmp109, align 4 + %tmp110 = getelementptr i8, i8 addrspace(1)* %a, i64 32 + %tmp111 = bitcast i8 addrspace(1)* %tmp110 to float addrspace(1)* + store float %tmp25, float addrspace(1)* %tmp111, align 4 + %tmp112 = getelementptr i8, i8 addrspace(1)* %a, i64 36 + %tmp113 = bitcast i8 addrspace(1)* %tmp112 to float addrspace(1)* + store float %tmp28, float addrspace(1)* %tmp113, align 4 + %tmp114 = getelementptr i8, i8 addrspace(1)* %a, i64 40 + %tmp115 = bitcast i8 addrspace(1)* %tmp114 to float addrspace(1)* + store float %tmp31, float addrspace(1)* %tmp115, align 4 + %tmp116 = getelementptr i8, i8 addrspace(1)* %a, i64 44 + %tmp117 = bitcast i8 addrspace(1)* %tmp116 to float addrspace(1)* + store float %tmp34, float addrspace(1)* %tmp117, align 4 + %tmp118 = getelementptr i8, i8 addrspace(1)* %a, i64 48 + %tmp119 = bitcast i8 addrspace(1)* %tmp118 to float addrspace(1)* + store float %tmp37, float addrspace(1)* %tmp119, align 4 + %tmp120 = getelementptr i8, i8 addrspace(1)* %a, i64 52 + %tmp121 = bitcast i8 addrspace(1)* %tmp120 to float addrspace(1)* + store float %tmp40, float addrspace(1)* %tmp121, align 4 + %tmp122 = getelementptr i8, i8 addrspace(1)* %a, i64 56 + %tmp123 = bitcast i8 addrspace(1)* %tmp122 to float addrspace(1)* + store float %tmp43, float addrspace(1)* %tmp123, align 4 + %tmp124 = getelementptr i8, i8 addrspace(1)* %a, i64 60 + %tmp125 = bitcast i8 addrspace(1)* %tmp124 to float addrspace(1)* + store float %tmp46, float addrspace(1)* %tmp125, align 4 + %tmp126 = getelementptr i8, i8 addrspace(1)* %a, i64 64 + %tmp127 = bitcast i8 addrspace(1)* %tmp126 to float addrspace(1)* + store float %tmp49, float addrspace(1)* %tmp127, align 4 + %tmp128 = getelementptr i8, i8 addrspace(1)* %a, i64 68 + %tmp129 = bitcast i8 addrspace(1)* %tmp128 to float addrspace(1)* + store float %tmp52, float addrspace(1)* %tmp129, align 4 + %tmp130 = getelementptr i8, i8 addrspace(1)* %a, i64 72 + %tmp131 = bitcast i8 addrspace(1)* %tmp130 to float addrspace(1)* + store float %tmp55, float addrspace(1)* %tmp131, align 4 + %tmp132 = getelementptr i8, i8 addrspace(1)* %a, i64 76 + %tmp133 = bitcast i8 addrspace(1)* %tmp132 to float addrspace(1)* + store float %tmp58, float addrspace(1)* %tmp133, align 4 + %tmp134 = getelementptr i8, i8 addrspace(1)* %a, i64 80 + %tmp135 = bitcast i8 addrspace(1)* %tmp134 to float addrspace(1)* + store float %tmp61, float addrspace(1)* %tmp135, align 4 + %tmp136 = getelementptr i8, i8 addrspace(1)* %a, i64 84 + %tmp137 = bitcast i8 addrspace(1)* %tmp136 to float addrspace(1)* + store float %tmp64, float addrspace(1)* %tmp137, align 4 + %tmp138 = getelementptr i8, i8 addrspace(1)* %a, i64 88 + %tmp139 = bitcast i8 addrspace(1)* %tmp138 to float addrspace(1)* + store float %tmp67, float addrspace(1)* %tmp139, align 4 + %tmp140 = getelementptr i8, i8 addrspace(1)* %a, i64 92 + %tmp141 = bitcast i8 addrspace(1)* %tmp140 to float addrspace(1)* + store float %tmp70, float addrspace(1)* %tmp141, align 4 + %tmp142 = getelementptr i8, i8 addrspace(1)* %a, i64 96 + %tmp143 = bitcast i8 addrspace(1)* %tmp142 to float addrspace(1)* + store float %tmp73, float addrspace(1)* %tmp143, align 4 + %tmp144 = getelementptr i8, i8 addrspace(1)* %a, i64 100 + %tmp145 = bitcast i8 addrspace(1)* %tmp144 to float addrspace(1)* + store float %tmp76, float addrspace(1)* %tmp145, align 4 + %tmp146 = getelementptr i8, i8 addrspace(1)* %a, i64 104 + %tmp147 = bitcast i8 addrspace(1)* %tmp146 to float addrspace(1)* + store float %tmp79, float addrspace(1)* %tmp147, align 4 + %tmp148 = getelementptr i8, i8 addrspace(1)* %a, i64 108 + %tmp149 = bitcast i8 addrspace(1)* %tmp148 to float addrspace(1)* + store float %tmp82, float addrspace(1)* %tmp149, align 4 + %tmp150 = getelementptr i8, i8 addrspace(1)* %a, i64 112 + %tmp151 = bitcast i8 addrspace(1)* %tmp150 to float addrspace(1)* + store float %tmp85, float addrspace(1)* %tmp151, align 4 + %tmp152 = getelementptr i8, i8 addrspace(1)* %a, i64 116 + %tmp153 = bitcast i8 addrspace(1)* %tmp152 to float addrspace(1)* + store float %tmp88, float addrspace(1)* %tmp153, align 4 + %tmp154 = getelementptr i8, i8 addrspace(1)* %a, i64 120 + %tmp155 = bitcast i8 addrspace(1)* %tmp154 to float addrspace(1)* + store float %tmp91, float addrspace(1)* %tmp155, align 4 + %tmp156 = getelementptr i8, i8 addrspace(1)* %a, i64 124 + %tmp157 = bitcast i8 addrspace(1)* %tmp156 to float addrspace(1)* + store float %tmp94, float addrspace(1)* %tmp157, align 4 + %tmp158 = getelementptr i8, i8 addrspace(1)* %b, i64 128 + %tmp159 = bitcast i8 addrspace(1)* %tmp158 to float addrspace(1)* + %tmp160 = load float, float addrspace(1)* %tmp159, align 4 + %tmp161 = getelementptr i8, i8 addrspace(1)* %b, i64 132 + %tmp162 = bitcast i8 addrspace(1)* %tmp161 to float addrspace(1)* + %tmp163 = load float, float addrspace(1)* %tmp162, align 4 + %tmp164 = getelementptr i8, i8 addrspace(1)* %b, i64 136 + %tmp165 = bitcast i8 addrspace(1)* %tmp164 to float addrspace(1)* + %tmp166 = load float, float addrspace(1)* %tmp165, align 4 + %tmp167 = getelementptr i8, i8 addrspace(1)* %b, i64 140 + %tmp168 = bitcast i8 addrspace(1)* %tmp167 to float addrspace(1)* + %tmp169 = load float, float addrspace(1)* %tmp168, align 4 + %tmp170 = getelementptr i8, i8 addrspace(1)* %b, i64 144 + %tmp171 = bitcast i8 addrspace(1)* %tmp170 to float addrspace(1)* + %tmp172 = load float, float addrspace(1)* %tmp171, align 4 + %tmp173 = getelementptr i8, i8 addrspace(1)* %b, i64 148 + %tmp174 = bitcast i8 addrspace(1)* %tmp173 to float addrspace(1)* + %tmp175 = load float, float addrspace(1)* %tmp174, align 4 + %tmp176 = getelementptr i8, i8 addrspace(1)* %b, i64 152 + %tmp177 = bitcast i8 addrspace(1)* %tmp176 to float addrspace(1)* + %tmp178 = load float, float addrspace(1)* %tmp177, align 4 + %tmp179 = getelementptr i8, i8 addrspace(1)* %b, i64 156 + %tmp180 = bitcast i8 addrspace(1)* %tmp179 to float addrspace(1)* + %tmp181 = load float, float addrspace(1)* %tmp180, align 4 + %tmp182 = getelementptr i8, i8 addrspace(1)* %b, i64 160 + %tmp183 = bitcast i8 addrspace(1)* %tmp182 to float addrspace(1)* + %tmp184 = load float, float addrspace(1)* %tmp183, align 4 + %tmp185 = getelementptr i8, i8 addrspace(1)* %b, i64 164 + %tmp186 = bitcast i8 addrspace(1)* %tmp185 to float addrspace(1)* + %tmp187 = load float, float addrspace(1)* %tmp186, align 4 + %tmp188 = getelementptr i8, i8 addrspace(1)* %b, i64 168 + %tmp189 = bitcast i8 addrspace(1)* %tmp188 to float addrspace(1)* + %tmp190 = load float, float addrspace(1)* %tmp189, align 4 + %tmp191 = getelementptr i8, i8 addrspace(1)* %b, i64 172 + %tmp192 = bitcast i8 addrspace(1)* %tmp191 to float addrspace(1)* + %tmp193 = load float, float addrspace(1)* %tmp192, align 4 + %tmp194 = getelementptr i8, i8 addrspace(1)* %b, i64 176 + %tmp195 = bitcast i8 addrspace(1)* %tmp194 to float addrspace(1)* + %tmp196 = load float, float addrspace(1)* %tmp195, align 4 + %tmp197 = getelementptr i8, i8 addrspace(1)* %b, i64 180 + %tmp198 = bitcast i8 addrspace(1)* %tmp197 to float addrspace(1)* + %tmp199 = load float, float addrspace(1)* %tmp198, align 4 + %tmp200 = getelementptr i8, i8 addrspace(1)* %b, i64 184 + %tmp201 = bitcast i8 addrspace(1)* %tmp200 to float addrspace(1)* + %tmp202 = load float, float addrspace(1)* %tmp201, align 4 + %tmp203 = getelementptr i8, i8 addrspace(1)* %b, i64 188 + %tmp204 = bitcast i8 addrspace(1)* %tmp203 to float addrspace(1)* + %tmp205 = load float, float addrspace(1)* %tmp204, align 4 + %tmp206 = getelementptr i8, i8 addrspace(1)* %b, i64 192 + %tmp207 = bitcast i8 addrspace(1)* %tmp206 to half addrspace(1)* + %tmp208 = load half, half addrspace(1)* %tmp207, align 2 + %tmp209 = getelementptr i8, i8 addrspace(1)* %a, i64 128 + %tmp210 = bitcast i8 addrspace(1)* %tmp209 to float addrspace(1)* + store float %tmp160, float addrspace(1)* %tmp210, align 4 + %tmp211 = getelementptr i8, i8 addrspace(1)* %a, i64 132 + %tmp212 = bitcast i8 addrspace(1)* %tmp211 to float addrspace(1)* + store float %tmp163, float addrspace(1)* %tmp212, align 4 + %tmp213 = getelementptr i8, i8 addrspace(1)* %a, i64 136 + %tmp214 = bitcast i8 addrspace(1)* %tmp213 to float addrspace(1)* + store float %tmp166, float addrspace(1)* %tmp214, align 4 + %tmp215 = getelementptr i8, i8 addrspace(1)* %a, i64 140 + %tmp216 = bitcast i8 addrspace(1)* %tmp215 to float addrspace(1)* + store float %tmp169, float addrspace(1)* %tmp216, align 4 + %tmp217 = getelementptr i8, i8 addrspace(1)* %a, i64 144 + %tmp218 = bitcast i8 addrspace(1)* %tmp217 to float addrspace(1)* + store float %tmp172, float addrspace(1)* %tmp218, align 4 + %tmp219 = getelementptr i8, i8 addrspace(1)* %a, i64 148 + %tmp220 = bitcast i8 addrspace(1)* %tmp219 to float addrspace(1)* + store float %tmp175, float addrspace(1)* %tmp220, align 4 + %tmp221 = getelementptr i8, i8 addrspace(1)* %a, i64 152 + %tmp222 = bitcast i8 addrspace(1)* %tmp221 to float addrspace(1)* + store float %tmp178, float addrspace(1)* %tmp222, align 4 + %tmp223 = getelementptr i8, i8 addrspace(1)* %a, i64 156 + %tmp224 = bitcast i8 addrspace(1)* %tmp223 to float addrspace(1)* + store float %tmp181, float addrspace(1)* %tmp224, align 4 + %tmp225 = getelementptr i8, i8 addrspace(1)* %a, i64 160 + %tmp226 = bitcast i8 addrspace(1)* %tmp225 to float addrspace(1)* + store float %tmp184, float addrspace(1)* %tmp226, align 4 + %tmp227 = getelementptr i8, i8 addrspace(1)* %a, i64 164 + %tmp228 = bitcast i8 addrspace(1)* %tmp227 to float addrspace(1)* + store float %tmp187, float addrspace(1)* %tmp228, align 4 + %tmp229 = getelementptr i8, i8 addrspace(1)* %a, i64 168 + %tmp230 = bitcast i8 addrspace(1)* %tmp229 to float addrspace(1)* + store float %tmp190, float addrspace(1)* %tmp230, align 4 + %tmp231 = getelementptr i8, i8 addrspace(1)* %a, i64 172 + %tmp232 = bitcast i8 addrspace(1)* %tmp231 to float addrspace(1)* + store float %tmp193, float addrspace(1)* %tmp232, align 4 + %tmp233 = getelementptr i8, i8 addrspace(1)* %a, i64 176 + %tmp234 = bitcast i8 addrspace(1)* %tmp233 to float addrspace(1)* + store float %tmp196, float addrspace(1)* %tmp234, align 4 + %tmp235 = getelementptr i8, i8 addrspace(1)* %a, i64 180 + %tmp236 = bitcast i8 addrspace(1)* %tmp235 to float addrspace(1)* + store float %tmp199, float addrspace(1)* %tmp236, align 4 + %tmp237 = getelementptr i8, i8 addrspace(1)* %a, i64 184 + %tmp238 = bitcast i8 addrspace(1)* %tmp237 to float addrspace(1)* + store float %tmp202, float addrspace(1)* %tmp238, align 4 + %tmp239 = getelementptr i8, i8 addrspace(1)* %a, i64 188 + %tmp240 = bitcast i8 addrspace(1)* %tmp239 to float addrspace(1)* + store float %tmp205, float addrspace(1)* %tmp240, align 4 + %tmp241 = getelementptr i8, i8 addrspace(1)* %a, i64 192 + %tmp242 = bitcast i8 addrspace(1)* %tmp241 to half addrspace(1)* + store half %tmp208, half addrspace(1)* %tmp242, align 2 + ret void +} + +attributes #0 = { argmemonly nounwind }