diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -1549,9 +1549,13 @@ const Twine &NamePrefix) { // Create i8 GEP for opaque pointers. if (Ptr->getType()->isOpaquePointerTy()) { - if (Offset != 0) - Ptr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(Offset), + if (Offset != 0) { + Type *Int8Ty = IRB.getInt8Ty(); + // This may fail due to alignment + assert(DL.getTypeAllocSize(Int8Ty) == 1 && "Unexpected I8 size!"); + Ptr = IRB.CreateInBoundsGEP(Int8Ty, Ptr, IRB.getInt(Offset), NamePrefix + "sroa_idx"); + } return IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, PointerTy, NamePrefix + "sroa_cast"); } @@ -1640,11 +1644,14 @@ Int8PtrOffset = Offset; } - OffsetPtr = Int8PtrOffset == 0 - ? Int8Ptr - : IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Int8Ptr, - IRB.getInt(Int8PtrOffset), - NamePrefix + "sroa_raw_idx"); + Type *Int8Ty = IRB.getInt8Ty(); + // This may fail due to alignment + assert(DL.getTypeAllocSize(Int8Ty) == 1 && "Unexpected I8 size!"); + OffsetPtr = + Int8PtrOffset == 0 + ? Int8Ptr + : IRB.CreateInBoundsGEP(Int8Ty, Int8Ptr, IRB.getInt(Int8PtrOffset), + NamePrefix + "sroa_raw_idx"); } Ptr = OffsetPtr; diff --git a/llvm/test/Transforms/SROA/overaligned-datalayout.ll b/llvm/test/Transforms/SROA/overaligned-datalayout.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SROA/overaligned-datalayout.ll @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes sroa -S | FileCheck %s --check-prefix=OVERALIGNED +; RUN: grep -v "target datalayout" %s | opt -passes sroa -S | FileCheck %s --check-prefix=NATURAL + +; Test case demonstrating an SROA issue in case of overaligned i8s. +; It is not clear whether SROA should support such cases, +; but the test case at least demonstrates the issue. + +; Non-vectors require at least 32-bit alignment. +target datalayout = "e-m:e-p:64:64-i8:32-i16:32-i32:32-i64:32-f16:32-f32:32-f64:32-n8:16:32" + +; Pack the byte to be accessed into a vector within a struct. +; For some reason, this is necessary to trigger the problematic behavior. +%AllocStruct = type { <3 x i16> } +; The <1 x i8> has an alignment of 8 bits, even if i8 has an alignment of 32 bits, +; so we can use it to access individual bytes. +%ByteAccessStruct = type { <1 x i8> } + +; Alloca a struct containing a <3 x i16>, then access its last byte using ByteAccessStruct. +define i8 @test_i8_access() { +; Note: This returns poison, but should return 2. This is caused by SROA transforming the byte +; access into an i8-based GEP using a byte-offset as index, which is incorrect (and out-of-bounds) +; because i8s have an alignment of 32 bits, and hence an AllocSize of 32 bits. +; OVERALIGNED-LABEL: @test_i8_access( +; OVERALIGNED-NEXT: [[INTVEC:%.*]] = insertelement <3 x i16> undef, i16 513, i32 2 +; OVERALIGNED-NEXT: [[LASTBYTESTR_FCA_0_INSERT:%.*]] = insertvalue [[BYTEACCESSSTRUCT:%.*]] poison, <1 x i8> poison, 0 +; OVERALIGNED-NEXT: [[LASTBYTEVEC:%.*]] = extractvalue [[BYTEACCESSSTRUCT]] [[LASTBYTESTR_FCA_0_INSERT]], 0 +; OVERALIGNED-NEXT: [[LASTBYTEI8:%.*]] = extractelement <1 x i8> [[LASTBYTEVEC]], i32 0 +; OVERALIGNED-NEXT: ret i8 [[LASTBYTEI8]] +; +; NATURAL-LABEL: @test_i8_access( +; NATURAL-NEXT: [[ALLOCA_SROA_0:%.*]] = alloca <3 x i16>, align 16 +; NATURAL-NEXT: [[INTVEC:%.*]] = insertelement <3 x i16> undef, i16 513, i32 2 +; NATURAL-NEXT: store <3 x i16> [[INTVEC]], <3 x i16>* [[ALLOCA_SROA_0]], align 16 +; NATURAL-NEXT: [[ALLOCA_SROA_0_5_LASTBYTESTR_FCA_0_GEP_SROA_RAW_CAST1:%.*]] = bitcast <3 x i16>* [[ALLOCA_SROA_0]] to i8* +; NATURAL-NEXT: [[ALLOCA_SROA_0_5_LASTBYTESTR_FCA_0_GEP_SROA_RAW_IDX2:%.*]] = getelementptr inbounds i8, i8* [[ALLOCA_SROA_0_5_LASTBYTESTR_FCA_0_GEP_SROA_RAW_CAST1]], i64 5 +; NATURAL-NEXT: [[ALLOCA_SROA_0_5_LASTBYTESTR_FCA_0_GEP_SROA_CAST3:%.*]] = bitcast i8* [[ALLOCA_SROA_0_5_LASTBYTESTR_FCA_0_GEP_SROA_RAW_IDX2]] to <1 x i8>* +; NATURAL-NEXT: [[ALLOCA_SROA_0_5_ALLOCA_SROA_0_5_LASTBYTESTR_FCA_0_LOAD:%.*]] = load <1 x i8>, <1 x i8>* [[ALLOCA_SROA_0_5_LASTBYTESTR_FCA_0_GEP_SROA_CAST3]], align 1 +; NATURAL-NEXT: [[LASTBYTESTR_FCA_0_INSERT:%.*]] = insertvalue [[BYTEACCESSSTRUCT:%.*]] poison, <1 x i8> [[ALLOCA_SROA_0_5_ALLOCA_SROA_0_5_LASTBYTESTR_FCA_0_LOAD]], 0 +; NATURAL-NEXT: [[LASTBYTEVEC:%.*]] = extractvalue [[BYTEACCESSSTRUCT]] [[LASTBYTESTR_FCA_0_INSERT]], 0 +; NATURAL-NEXT: [[LASTBYTEI8:%.*]] = extractelement <1 x i8> [[LASTBYTEVEC]], i32 0 +; NATURAL-NEXT: ret i8 [[LASTBYTEI8]] +; + %alloca = alloca %AllocStruct, align 16 + %intvec = insertelement <3 x i16> undef, i16 513, i32 2 + %intvecptr = getelementptr %AllocStruct, %AllocStruct* %alloca, i32 0, i32 0 + store <3 x i16> %intvec, <3 x i16>* %intvecptr + %allocaBytePtr = bitcast %AllocStruct* %alloca to %ByteAccessStruct* + %allocaBytePtrLastByte = getelementptr %ByteAccessStruct, %ByteAccessStruct* %allocaBytePtr, i32 5 + %lastByteStr = load %ByteAccessStruct, %ByteAccessStruct* %allocaBytePtrLastByte + %lastByteVec = extractvalue %ByteAccessStruct %lastByteStr, 0 + %lastByteI8 = extractelement <1 x i8> %lastByteVec, i32 0 + ret i8 %lastByteI8 +}