diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -780,6 +780,9 @@ LI.getPointerAddressSpace() != DL.getAllocaAddrSpace()) return PI.setAborted(&LI); + if (isa(LI.getType())) + return PI.setAborted(&LI); + uint64_t Size = DL.getTypeStoreSize(LI.getType()).getFixedSize(); return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile()); } @@ -795,6 +798,9 @@ SI.getPointerAddressSpace() != DL.getAllocaAddrSpace()) return PI.setAborted(&SI); + if (isa(ValOp->getType())) + return PI.setAborted(&SI); + uint64_t Size = DL.getTypeStoreSize(ValOp->getType()).getFixedSize(); // If this memory access can be shown to *statically* extend outside the @@ -1538,6 +1544,8 @@ Type *ElementTy = Ty->getElementType(); if (!ElementTy->isSized()) return nullptr; // We can't GEP through an unsized element. + if (isa(ElementTy)) + return nullptr; APInt ElementSize(Offset.getBitWidth(), DL.getTypeAllocSize(ElementTy).getFixedSize()); if (ElementSize == 0) diff --git a/llvm/test/Transforms/SROA/scalable-vectors.ll b/llvm/test/Transforms/SROA/scalable-vectors.ll --- a/llvm/test/Transforms/SROA/scalable-vectors.ll +++ b/llvm/test/Transforms/SROA/scalable-vectors.ll @@ -34,3 +34,50 @@ %1 = load volatile , * %vec.addr ret %1 } + +; Test we bail out when using an alloca of a fixed-length vector (VLS) that was +; bitcasted to a scalable vector. +define @cast_alloca_to_svint32_t( %type.coerce) { +; CHECK-LABEL: cast_alloca_to_svint32_t +; CHECK-NEXT: %type = alloca <16 x i32>, align 64 +; CHECK-NEXT: %type.addr = alloca <16 x i32>, align 64 +; CHECK-NEXT: %1 = bitcast <16 x i32>* %type to * +; CHECK-NEXT: store %type.coerce, * %1, align 16 +; CHECK-NEXT: %type1 = load <16 x i32>, <16 x i32>* %type, align 64 +; CHECK-NEXT: store <16 x i32> %type1, <16 x i32>* %type.addr, align 64 +; CHECK-NEXT: %2 = load <16 x i32>, <16 x i32>* %type.addr, align 64 +; CHECK-NEXT: %3 = bitcast <16 x i32>* %type.addr to * +; CHECK-NEXT: %4 = load , * %3, align 16 +; CHECK-NEXT: ret %4 + %type = alloca <16 x i32> + %type.addr = alloca <16 x i32> + %1 = bitcast <16 x i32>* %type to * + store %type.coerce, * %1 + %type1 = load <16 x i32>, <16 x i32>* %type + store <16 x i32> %type1, <16 x i32>* %type.addr + %2 = load <16 x i32>, <16 x i32>* %type.addr + %3 = bitcast <16 x i32>* %type.addr to * + %4 = load , * %3 + ret %4 +} + +; When casting from VLA to VLS via memory check we bail out when producing a +; GEP where the element type is a scalable vector. +define @cast_alloca_from_svint32_t() { +; CHECK-LABEL: cast_alloca_from_svint32_t +; CHECK-NEXT: %retval.coerce = alloca , align 16 +; CHECK-NEXT: %1 = bitcast * %retval.coerce to i8* +; CHECK-NEXT: %retval.0..sroa_cast = bitcast i8* %1 to <16 x i32>* +; CHECK-NEXT: store <16 x i32> undef, <16 x i32>* %retval.0..sroa_cast, align 16 +; CHECK-NEXT: %2 = load , * %retval.coerce, align 16 +; CHECK-NEXT: ret %2 + %retval = alloca <16 x i32> + %retval.coerce = alloca + %1 = bitcast * %retval.coerce to i8* + %2 = bitcast <16 x i32>* %retval to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 %2, i64 64, i1 false) + %3 = load , * %retval.coerce + ret %3 +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind