Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -388,6 +388,11 @@ /// operations, shuffles, or casts. bool isFPVectorizationPotentiallyUnsafe() const; + /// \brief Determine if the target supports unaligned memory accesses. + bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace = 0, + unsigned Alignment = 1, + bool *Fast = nullptr) const; + /// \brief Return hardware support for population count. PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const; @@ -653,6 +658,10 @@ virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0; virtual bool enableInterleavedAccessVectorization() = 0; virtual bool isFPVectorizationPotentiallyUnsafe() = 0; + virtual bool allowsMisalignedMemoryAccesses(unsigned BitWidth, + unsigned AddressSpace, + unsigned Alignment, + bool *Fast) = 0; virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0; virtual bool haveFastSqrt(Type *Ty) = 0; virtual int getFPOpCost(Type *Ty) = 0; @@ -820,6 +829,11 @@ bool isFPVectorizationPotentiallyUnsafe() override { return Impl.isFPVectorizationPotentiallyUnsafe(); } + bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace, + unsigned Alignment, bool *Fast) override { + return Impl.allowsMisalignedMemoryAccesses(BitWidth, AddressSpace, + Alignment, Fast); + } PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override { return Impl.getPopcntSupport(IntTyWidthInBit); } Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -244,6 +244,11 @@ bool isFPVectorizationPotentiallyUnsafe() { return false; } + bool allowsMisalignedMemoryAccesses(unsigned BitWidth, + unsigned AddressSpace, + unsigned Alignment, + bool *Fast) { return false; } + TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) { return TTI::PSK_Software; } Index: include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- include/llvm/CodeGen/BasicTTIImpl.h +++ include/llvm/CodeGen/BasicTTIImpl.h @@ -105,6 +105,11 @@ /// \name Scalar TTI Implementations /// @{ + bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace, + unsigned Alignment, bool *Fast) const { + MVT M = MVT::getIntegerVT(BitWidth); + return getTLI()->allowsMisalignedMemoryAccesses(M, AddressSpace, Alignment, Fast); + } bool hasBranchDivergence() { return false; } Index: lib/Analysis/TargetTransformInfo.cpp =================================================================== --- lib/Analysis/TargetTransformInfo.cpp +++ lib/Analysis/TargetTransformInfo.cpp @@ -181,6 +181,14 @@ return TTIImpl->isFPVectorizationPotentiallyUnsafe(); } +bool TargetTransformInfo::allowsMisalignedMemoryAccesses(unsigned BitWidth, + unsigned AddressSpace, + unsigned Alignment, + bool *Fast) const { + return TTIImpl->allowsMisalignedMemoryAccesses(BitWidth, AddressSpace, + Alignment, Fast); +} + TargetTransformInfo::PopcntSupportKind TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const { return TTIImpl->getPopcntSupport(IntTyWidthInBit); Index: lib/Transforms/Vectorize/LoadStoreVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -127,6 +127,10 @@ /// Vectorizes the store instructions in Chain. bool vectorizeStoreChain(ArrayRef Chain); + + /// Check if this load/store access is misaligned accesses + bool accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace, + unsigned Alignment); }; class LoadStoreVectorizer : public FunctionPass { @@ -692,18 +696,16 @@ unsigned Alignment = getAlignment(S0); // If the store is going to be misaligned, don't vectorize it. - // TODO: Check TLI.allowsMisalignedMemoryAccess - if ((Alignment % SzInBytes) != 0 && (Alignment % TargetBaseAlign) != 0) { - if (S0->getPointerAddressSpace() == 0) { - // If we're storing to an object on the stack, we control its alignment, - // so we can cheat and change it! - Value *V = GetUnderlyingObject(S0->getPointerOperand(), DL); - if (AllocaInst *AI = dyn_cast_or_null(V)) { - AI->setAlignment(TargetBaseAlign); - Alignment = TargetBaseAlign; - } else { - return false; - } + if (accessIsMisaligned(SzInBytes, AS, Alignment)) { + if (S0->getPointerAddressSpace() != 0) + return false; + + // If we're storing to an object on the stack, we control its alignment, + // so we can cheat and change it! + Value *V = GetUnderlyingObject(S0->getPointerOperand(), DL); + if (AllocaInst *AI = dyn_cast_or_null(V)) { + AI->setAlignment(TargetBaseAlign); + Alignment = TargetBaseAlign; } else { return false; } @@ -821,18 +823,16 @@ unsigned Alignment = getAlignment(L0); // If the load is going to be misaligned, don't vectorize it. - // TODO: Check TLI.allowsMisalignedMemoryAccess and remove TargetBaseAlign. - if ((Alignment % SzInBytes) != 0 && (Alignment % TargetBaseAlign) != 0) { - if (L0->getPointerAddressSpace() == 0) { - // If we're loading from an object on the stack, we control its alignment, - // so we can cheat and change it! - Value *V = GetUnderlyingObject(L0->getPointerOperand(), DL); - if (AllocaInst *AI = dyn_cast_or_null(V)) { - AI->setAlignment(TargetBaseAlign); - Alignment = TargetBaseAlign; - } else { - return false; - } + if (accessIsMisaligned(SzInBytes, AS, Alignment)) { + if (L0->getPointerAddressSpace() != 0) + return false; + + // If we're loading from an object on the stack, we control its alignment, + // so we can cheat and change it! + Value *V = GetUnderlyingObject(L0->getPointerOperand(), DL); + if (AllocaInst *AI = dyn_cast_or_null(V)) { + AI->setAlignment(TargetBaseAlign); + Alignment = TargetBaseAlign; } else { return false; } @@ -915,3 +915,13 @@ NumScalarsVectorized += Chain.size(); return true; } + +bool Vectorizer::accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace, + unsigned Alignment) { + bool Fast = false; + bool Allows = TTI.allowsMisalignedMemoryAccesses(SzInBytes * 8, AddressSpace, + Alignment, &Fast); + // TODO: Remove TargetBaseAlign + return !(Allows && Fast) && (Alignment % SzInBytes) != 0 && + (Alignment % TargetBaseAlign) != 0; +} Index: test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll =================================================================== --- test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll +++ test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll @@ -19,8 +19,7 @@ } ; CHECK-LABEL: @merge_global_store_2_constants_i8_natural_align -; CHECK: store i8 -; CHECK: store i8 +; CHECK: store <2 x i8> define void @merge_global_store_2_constants_i8_natural_align(i8 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i32 1 @@ -50,8 +49,7 @@ } ; CHECK-LABEL: @merge_global_store_2_constants_i16_natural_align -; CHECK: store i16 -; CHECK: store i16 +; CHECK: store <2 x i16> define void @merge_global_store_2_constants_i16_natural_align(i16 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1 @@ -61,8 +59,7 @@ } ; CHECK-LABEL: @merge_global_store_2_constants_half_natural_align -; CHECK: store half -; CHECK: store half +; CHECK: store <2 x half> define void @merge_global_store_2_constants_half_natural_align(half addrspace(1)* %out) #0 { %out.gep.1 = getelementptr half, half addrspace(1)* %out, i32 1 @@ -432,14 +429,8 @@ } ; CHECK-LABEL: @merge_global_store_4_adjacent_loads_i8_natural_align -; CHECK: load i8 -; CHECK: load i8 -; CHECK: load i8 -; CHECK: load i8 -; CHECK: store i8 -; CHECK: store i8 -; CHECK: store i8 -; CHECK: store i8 +; CHECK: load <4 x i8> +; CHECK: store <4 x i8> define void @merge_global_store_4_adjacent_loads_i8_natural_align(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i8 1 %out.gep.2 = getelementptr i8, i8 addrspace(1)* %out, i8 2