diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp --- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp @@ -82,6 +82,7 @@ int getSizeOf(const Value *Val) const; int getSizeOf(const Type *Ty) const; + int getAllocSizeOf(const Type *Ty) const; int getTypeAlignment(Type *Ty) const; VectorType *getByteVectorTy(int ScLen) const; @@ -443,8 +444,8 @@ auto *PtrTy = cast(Ptr->getType()); if (!PtrTy->isOpaque()) { Type *ElemTy = PtrTy->getElementType(); - int ElemSize = HVC.getSizeOf(ElemTy); - if (Adjust % ElemSize == 0) { + int ElemSize = HVC.getAllocSizeOf(ElemTy); + if (Adjust % ElemSize == 0 && Adjust != 0) { Value *Tmp0 = Builder.CreateGEP(ElemTy, Ptr, HVC.getConstInt(Adjust / ElemSize)); return Builder.CreatePointerCast(Tmp0, ValTy->getPointerTo()); @@ -979,6 +980,10 @@ return DL.getTypeStoreSize(const_cast(Ty)).getFixedValue(); } +auto HexagonVectorCombine::getAllocSizeOf(const Type *Ty) const -> int { + return DL.getTypeAllocSize(const_cast(Ty)).getFixedValue(); +} + auto HexagonVectorCombine::getTypeAlignment(Type *Ty) const -> int { // The actual type may be shorter than the HVX vector, so determine // the alignment based on subtarget info. @@ -1326,7 +1331,7 @@ return None; Builder B(Gep0->getParent()); - int Scale = DL.getTypeStoreSize(Gep0->getSourceElementType()); + int Scale = getAllocSizeOf(Gep0->getSourceElementType()); // FIXME: for now only check GEPs with a single index. if (Gep0->getNumOperands() != 2 || Gep1->getNumOperands() != 2) diff --git a/llvm/test/CodeGen/Hexagon/autohvx/vector-align-addr.ll b/llvm/test/CodeGen/Hexagon/autohvx/vector-align-addr.ll --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/vector-align-addr.ll @@ -0,0 +1,48 @@ +; RUN: llc -march=hexagon -hexagon-hvx-widen=32 < %s | FileCheck %s + +; Test that the Hexagon Vector Combine pass computes the address +; correctly when the loading objects that contain extra padding +; between successive objects. + +; CHECK: [[REG:r[0-9]+]] = add(r{{[0-9]+}},#2432) +; CHECK: = vmem([[REG]]+#0) + +define dllexport void @test(i8* %0) local_unnamed_addr #0 { +entry: + %1 = add nuw nsw i32 0, 3040 + %2 = load i8, i8* undef, align 1 + %3 = insertelement <19 x i8> undef, i8 %2, i32 0 + %4 = shufflevector <19 x i8> %3, <19 x i8> undef, <19 x i32> zeroinitializer + %5 = getelementptr inbounds i8, i8* %0, i32 %1 + %6 = bitcast i8* %5 to <19 x i8>* + %7 = load <19 x i8>, <19 x i8>* %6, align 1 + %8 = mul <19 x i8> %4, %7 + %9 = add <19 x i8> %8, zeroinitializer + %10 = add <19 x i8> zeroinitializer, %9 + %11 = add <19 x i8> zeroinitializer, %10 + %12 = add <19 x i8> zeroinitializer, %11 + %13 = add <19 x i8> zeroinitializer, %12 + %14 = add <19 x i8> zeroinitializer, %13 + %15 = add <19 x i8> zeroinitializer, %14 + %16 = add <19 x i8> zeroinitializer, %15 + %17 = add <19 x i8> zeroinitializer, %16 + %18 = add <19 x i8> zeroinitializer, %17 + %19 = add <19 x i8> zeroinitializer, %18 + %20 = load i8, i8* undef, align 1 + %21 = insertelement <19 x i8> undef, i8 %20, i32 0 + %22 = shufflevector <19 x i8> %21, <19 x i8> undef, <19 x i32> zeroinitializer + %23 = add nuw nsw i32 0, 5472 + %24 = getelementptr inbounds i8, i8* %0, i32 %23 + %25 = bitcast i8* %24 to <19 x i8>* + %26 = load <19 x i8>, <19 x i8>* %25, align 1 + %27 = mul <19 x i8> %22, %26 + %28 = add <19 x i8> %27, %19 + %29 = add <19 x i8> zeroinitializer, %28 + %30 = add <19 x i8> zeroinitializer, %29 + %31 = add <19 x i8> zeroinitializer, %30 + %32 = bitcast i8* %0 to <19 x i8>* + store <19 x i8> %31, <19 x i8>* %32, align 1 + ret void +} + +attributes #0 = { "target-features"="+hvxv66,+hvx-length128b" }