Index: llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp =================================================================== --- llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -616,6 +616,13 @@ if ((TySize % 8) != 0) continue; + // Skip vectors of pointers. The vectorizeLoadChain/vectorizeStoreChain + // functions are currently using an integer type for the vectorized + // load/store, and does not support casting between the integer type and a + // vector of pointers (e.g. i64 to <2 x i16*>) + if (Ty->isVectorTy() && Ty->isPtrOrPtrVectorTy()) + continue; + Value *Ptr = LI->getPointerOperand(); unsigned AS = Ptr->getType()->getPointerAddressSpace(); unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS); @@ -646,6 +653,13 @@ if (!VectorType::isValidElementType(Ty->getScalarType())) continue; + // Skip vectors of pointers. The vectorizeLoadChain/vectorizeStoreChain + // functions are currently using an integer type for the vectorized + // load/store, and does not support casting between the integer type and a + // vector of pointers (e.g. i64 to <2 x i16*>) + if (Ty->isVectorTy() && Ty->isPtrOrPtrVectorTy()) + continue; + // Skip weird non-byte sizes. They probably aren't worth the effort of // handling correctly. unsigned TySize = DL.getTypeSizeInBits(Ty); @@ -701,8 +715,8 @@ SmallVector Heads, Tails; int ConsecutiveChain[64]; - // Do a quadratic search on all of the given stores and find all of the pairs - // of stores that follow each other. + // Do a quadratic search on all of the given loads/stores and find all of the + // pairs of loads/stores that follow each other. for (int i = 0, e = Instrs.size(); i < e; ++i) { ConsecutiveChain[i] = -1; for (int j = e - 1; j >= 0; --j) { @@ -769,7 +783,7 @@ SmallPtrSet *InstructionsProcessed) { StoreInst *S0 = cast(Chain[0]); - // If the vector has an int element, default to int for the whole load. + // If the vector has an int element, default to int for the whole store. Type *StoreTy; for (Instruction *I : Chain) { StoreTy = cast(I)->getValueOperand()->getType(); Index: llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll =================================================================== --- llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll +++ llvm/trunk/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll @@ -632,6 +632,26 @@ ret void } +; Verify that we no longer hit asserts for this test case. No change expected. +; CHECK-LABEL: @copy_vec_of_ptrs +; CHECK: %in.gep.1 = getelementptr <2 x i16*>, <2 x i16*> addrspace(1)* %in, i32 1 +; CHECK: %vec1 = load <2 x i16*>, <2 x i16*> addrspace(1)* %in.gep.1 +; CHECK: %vec2 = load <2 x i16*>, <2 x i16*> addrspace(1)* %in, align 4 +; CHECK: %out.gep.1 = getelementptr <2 x i16*>, <2 x i16*> addrspace(1)* %out, i32 1 +; CHECK: store <2 x i16*> %vec1, <2 x i16*> addrspace(1)* %out.gep.1 +; CHECK: store <2 x i16*> %vec2, <2 x i16*> addrspace(1)* %out, align 4 +define amdgpu_kernel void @copy_vec_of_ptrs(<2 x i16*> addrspace(1)* %out, + <2 x i16*> addrspace(1)* %in ) #0 { + %in.gep.1 = getelementptr <2 x i16*>, <2 x i16*> addrspace(1)* %in, i32 1 + %vec1 = load <2 x i16*>, <2 x i16*> addrspace(1)* %in.gep.1 + %vec2 = load <2 x i16*>, <2 x i16*> addrspace(1)* %in, align 4 + + %out.gep.1 = getelementptr <2 x i16*>, <2 x i16*> addrspace(1)* %out, i32 1 + store <2 x i16*> %vec1, <2 x i16*> addrspace(1)* %out.gep.1 + store <2 x i16*> %vec2, <2 x i16*> addrspace(1)* %out, align 4 + ret void +} + declare void @llvm.amdgcn.s.barrier() #1 attributes #0 = { nounwind }