Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -388,6 +388,11 @@ /// operations, shuffles, or casts. bool isFPVectorizationPotentiallyUnsafe() const; + /// \brief Indicate whether target allows misaligned memory accesses + bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace = 0, + unsigned Alignment = 1, + bool *Fast = nullptr) const; + /// \brief Return hardware support for population count. PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const; @@ -653,6 +658,10 @@ virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0; virtual bool enableInterleavedAccessVectorization() = 0; virtual bool isFPVectorizationPotentiallyUnsafe() = 0; + virtual bool allowsMisalignedMemoryAccesses(unsigned BitWidth, + unsigned AddressSpace, + unsigned Alignment, + bool *Fast) = 0; virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0; virtual bool haveFastSqrt(Type *Ty) = 0; virtual int getFPOpCost(Type *Ty) = 0; @@ -820,6 +829,11 @@ bool isFPVectorizationPotentiallyUnsafe() override { return Impl.isFPVectorizationPotentiallyUnsafe(); } + bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace, + unsigned Alignment, bool *Fast) override { + return Impl.allowsMisalignedMemoryAccesses(BitWidth, AddressSpace, + Alignment, Fast); + } PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override { return Impl.getPopcntSupport(IntTyWidthInBit); } Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -244,6 +244,11 @@ bool isFPVectorizationPotentiallyUnsafe() { return false; } + bool allowsMisalignedMemoryAccesses(unsigned BitWidth, + unsigned AddressSpace, + unsigned Alignment, + bool *Fast) { return false; } + TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) { return TTI::PSK_Software; } Index: include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- include/llvm/CodeGen/BasicTTIImpl.h +++ include/llvm/CodeGen/BasicTTIImpl.h @@ -105,6 +105,11 @@ /// \name Scalar TTI Implementations /// @{ + bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace, + unsigned Alignment, bool *Fast) const { + MVT M = MVT::getIntegerVT(BitWidth); + return getTLI()->allowsMisalignedMemoryAccesses(M, AddressSpace, Alignment, Fast); + } bool hasBranchDivergence() { return false; } Index: lib/Analysis/TargetTransformInfo.cpp =================================================================== --- lib/Analysis/TargetTransformInfo.cpp +++ lib/Analysis/TargetTransformInfo.cpp @@ -181,6 +181,14 @@ return TTIImpl->isFPVectorizationPotentiallyUnsafe(); } +bool TargetTransformInfo::allowsMisalignedMemoryAccesses(unsigned BitWidth, + unsigned AddressSpace, + unsigned Alignment, + bool *Fast) const { + return TTIImpl->allowsMisalignedMemoryAccesses(BitWidth, AddressSpace, + Alignment, Fast); +} + TargetTransformInfo::PopcntSupportKind TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const { return TTIImpl->getPopcntSupport(IntTyWidthInBit); Index: lib/Transforms/Vectorize/LoadStoreVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -128,6 +128,10 @@ /// Vectorizes the store instructions in Chain. bool vectorizeStoreChain(ArrayRef Chain); + + /// Query target for allowed misaligned accesses + bool allowsMisaligned(unsigned SzInBytes, unsigned AddressSpace, + unsigned Alignment, bool *Fast = nullptr); }; class LoadStoreVectorizer : public FunctionPass { @@ -695,8 +699,10 @@ unsigned Alignment = getAlignment(S0); // If the store is going to be misaligned, don't vectorize it. - // TODO: Check TLI.allowsMisalignedMemoryAccess - if ((Alignment % SzInBytes) != 0 && (Alignment % TargetBaseAlign) != 0) { + // TODO: Remove TargetBaseAlign + bool Fast; + if (allowsMisaligned(SzInBytes, AS, Alignment, &Fast) == true && Fast && + (Alignment % SzInBytes) != 0 && (Alignment % TargetBaseAlign) != 0) { if (S0->getPointerAddressSpace() == 0) { // If we're storing to an object on the stack, we control its alignment, // so we can cheat and change it! @@ -823,8 +829,10 @@ unsigned Alignment = getAlignment(L0); // If the load is going to be misaligned, don't vectorize it. - // TODO: Check TLI.allowsMisalignedMemoryAccess and remove TargetBaseAlign. - if ((Alignment % SzInBytes) != 0 && (Alignment % TargetBaseAlign) != 0) { + // TODO: Remove TargetBaseAlign + bool Fast; + if (allowsMisaligned(SzInBytes, AS, Alignment, &Fast) == true && Fast && + (Alignment % SzInBytes) != 0 && (Alignment % TargetBaseAlign) != 0) { if (L0->getPointerAddressSpace() == 0) { // If we're loading from an object on the stack, we control its alignment, // so we can cheat and change it! @@ -917,3 +925,10 @@ NumScalarsVectorized += Chain.size(); return true; } + +bool Vectorizer::allowsMisaligned(unsigned SzInBytes, unsigned AddressSpace, + unsigned Alignment, bool *Fast) { + return TTI.allowsMisalignedMemoryAccesses(SzInBytes*8, AddressSpace, + Alignment, Fast); +} + Index: test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll =================================================================== --- test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll +++ test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll @@ -502,8 +502,7 @@ } ; CHECK-LABEL: @merge_local_store_2_constants_i32_align_2 -; CHECK: store i32 -; CHECK: store i32 +; CHECK: store <2 x i32> , <2 x i32> addrspace(3)* %1, align 2 define void @merge_local_store_2_constants_i32_align_2(i32 addrspace(3)* %out) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1