diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -1976,6 +1976,7 @@ // Collect the candidate types for vector-based promotion. Also track whether // we have different element types. SmallVector CandidateTys; + SetVector LoadStoreTys; Type *CommonEltTy = nullptr; VectorType *CommonVecPtrTy = nullptr; bool HaveVecPtrTy = false; @@ -2009,15 +2010,37 @@ } } }; - // Consider any loads or stores that are the exact size of the slice. - for (const Slice &S : P) - if (S.beginOffset() == P.beginOffset() && - S.endOffset() == P.endOffset()) { - if (auto *LI = dyn_cast(S.getUse()->getUser())) - CheckCandidateType(LI->getType()); - else if (auto *SI = dyn_cast(S.getUse()->getUser())) - CheckCandidateType(SI->getValueOperand()->getType()); + // Put load and store types into a set for de-duplication. + for (const Slice &S : P) { + Type *Ty; + if (auto *LI = dyn_cast(S.getUse()->getUser())) + Ty = LI->getType(); + else if (auto *SI = dyn_cast(S.getUse()->getUser())) + Ty = SI->getValueOperand()->getType(); + else + continue; + LoadStoreTys.insert(Ty); + // Consider any loads or stores that are the exact size of the slice. + if (S.beginOffset() == P.beginOffset() && S.endOffset() == P.endOffset()) + CheckCandidateType(Ty); + } + // Consider additional vector types where the element type size is a + // multiple of load/store element size. + for (Type *Ty : LoadStoreTys) { + if (!VectorType::isValidElementType(Ty)) + continue; + unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue(); + for (VectorType *&VTy : CandidateTys) { + unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue(); + unsigned ElementSize = + DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue(); + if (TypeSize != VectorSize && TypeSize != ElementSize && + VectorSize % TypeSize == 0) { + VectorType *NewVTy = VectorType::get(Ty, VectorSize / TypeSize, false); + CheckCandidateType(NewVTy); + } } + } // If we didn't find a vector type, nothing to do here. if (CandidateTys.empty()) diff --git a/llvm/test/Transforms/SROA/pr57796.ll b/llvm/test/Transforms/SROA/pr57796.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SROA/pr57796.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG + +%struct.Value = type { %union.anon } +%union.anon = type { <32 x i8> } + +@A = dso_local global i64 0, align 8 + +; Make sure that sroa does not crash when dealing with an invalid vector +; element type. +define void @foo() { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[REF_TMP_I:%.*]] = alloca [[STRUCT_VALUE:%.*]], align 32 +; CHECK-NEXT: call void @value_create(ptr sret([[STRUCT_VALUE]]) align 32 [[REF_TMP_I]]) +; CHECK-NEXT: [[CALL_I:%.*]] = call align 32 ptr @value_set_type(ptr align 32 [[REF_TMP_I]]) +; CHECK-NEXT: [[TMP0:%.*]] = load <32 x i8>, ptr [[CALL_I]], align 32 +; CHECK-NEXT: [[REF_TMP_SROA_0_0_VEC_EXTRACT:%.*]] = shufflevector <32 x i8> [[TMP0]], <32 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[REF_TMP_SROA_0_0_VEC_EXTRACT]] to x86_mmx +; CHECK-NEXT: [[TMP2:%.*]] = call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx [[TMP1]], i8 0) +; CHECK-NEXT: store x86_mmx [[TMP2]], ptr @A, align 8 +; CHECK-NEXT: ret void +; +entry: + %ref.tmp.i = alloca %struct.Value, align 32 + %ref.tmp = alloca %struct.Value, align 32 + call void @value_create(ptr sret(%struct.Value) align 32 %ref.tmp.i) + %call.i = call align 32 ptr @value_set_type(ptr align 32 %ref.tmp.i) + %0 = load <32 x i8>, ptr %call.i, align 32 + store <32 x i8> %0, ptr %ref.tmp, align 32 + %1 = load x86_mmx, ptr %ref.tmp, align 32 + %2 = call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 0) + store x86_mmx %2, ptr @A, align 8 + ret void +} + +declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8 immarg) + +declare dso_local void @value_create(ptr sret(%struct.Value) align 32) + +declare dso_local align 32 ptr @value_set_type(ptr align 32) + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-MODIFY-CFG: {{.*}} +; CHECK-PRESERVE-CFG: {{.*}} diff --git a/llvm/test/Transforms/SROA/sroa-common-type-fail-promotion.ll b/llvm/test/Transforms/SROA/sroa-common-type-fail-promotion.ll --- a/llvm/test/Transforms/SROA/sroa-common-type-fail-promotion.ll +++ b/llvm/test/Transforms/SROA/sroa-common-type-fail-promotion.ll @@ -46,12 +46,15 @@ ; CHECK-LABEL: @test_memset( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DATA:%.*]] = load <4 x float>, ptr undef, align 16 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[DATA]] to <8 x half> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[DATA]] to <8 x i16> ; CHECK-NEXT: br label [[BB:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <8 x half> [[TMP0]], i32 0 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <8 x half> [[TMP0]], i32 1 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <8 x half> [[TMP0]], i32 2 +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT]] to half +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT]] to half +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT]] to half ; CHECK-NEXT: ret void ; entry: @@ -217,7 +220,7 @@ ; CHECK: bb: ; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 0 ; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 1 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_4_0_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP1]], i32 0 +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_4_16_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP1]], i32 0 ; CHECK-NEXT: ret void ; entry: @@ -285,12 +288,15 @@ ; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_5:%.*]] = alloca <8 x half>, align 16 ; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 16 [[B_BLOCKWISE_COPY_SROA_5]], i8 0, i32 16, i1 false) ; CHECK-NEXT: [[DATA:%.*]] = load <4 x float>, ptr undef, align 16 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[DATA]] to <8 x half> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[DATA]] to <8 x i16> ; CHECK-NEXT: br label [[BB:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <8 x half> [[TMP0]], i32 0 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <8 x half> [[TMP0]], i32 1 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <8 x half> [[TMP0]], i32 2 +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT]] to half +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT]] to half +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT]] to half ; CHECK-NEXT: ret void ; entry: @@ -315,12 +321,15 @@ ; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_5:%.*]] = alloca <8 x half>, align 16 ; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 16 [[B_BLOCKWISE_COPY_SROA_5]], i8 0, i32 16, i1 false) ; CHECK-NEXT: [[DATA:%.*]] = load <4 x float>, ptr undef, align 16 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[DATA]] to <8 x half> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[DATA]] to <8 x i16> ; CHECK-NEXT: br label [[BB:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <8 x half> [[TMP0]], i32 0 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <8 x half> [[TMP0]], i32 1 -; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <8 x half> [[TMP0]], i32 2 +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT]] to half +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT]] to half +; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT]] to half ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SROA/vector-promotion.ll b/llvm/test/Transforms/SROA/vector-promotion.ll --- a/llvm/test/Transforms/SROA/vector-promotion.ll +++ b/llvm/test/Transforms/SROA/vector-promotion.ll @@ -891,34 +891,26 @@ ; cast to a different vector type ; CHECK-LABEL: @test13( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[X_SROA_0:%.*]] = alloca <2 x i64>, align 16 -; CHECK-NEXT: store i32 [[A:%.*]], ptr [[X_SROA_0]], align 16 -; CHECK-NEXT: [[X_SROA_0_4_X_TMP2_SROA_IDX1:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 4 -; CHECK-NEXT: store i32 [[B:%.*]], ptr [[X_SROA_0_4_X_TMP2_SROA_IDX1]], align 4 -; CHECK-NEXT: [[X_SROA_0_8_X_TMP3_SROA_IDX2:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 8 -; CHECK-NEXT: store i32 [[C:%.*]], ptr [[X_SROA_0_8_X_TMP3_SROA_IDX2]], align 8 -; CHECK-NEXT: [[X_SROA_0_12_X_TMP4_SROA_IDX3:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 12 -; CHECK-NEXT: store i32 [[D:%.*]], ptr [[X_SROA_0_12_X_TMP4_SROA_IDX3]], align 4 -; CHECK-NEXT: [[X_SROA_0_0_X_SROA_0_0_RESULT:%.*]] = load <2 x i64>, ptr [[X_SROA_0]], align 16 -; CHECK-NEXT: ret <2 x i64> [[X_SROA_0_0_X_SROA_0_0_RESULT]] +; CHECK-NEXT: [[X_SROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i32 0 +; CHECK-NEXT: [[X_SROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x i32> [[X_SROA_0_0_VEC_INSERT]], i32 [[B:%.*]], i32 1 +; CHECK-NEXT: [[X_SROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x i32> [[X_SROA_0_4_VEC_INSERT]], i32 [[C:%.*]], i32 2 +; CHECK-NEXT: [[X_SROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x i32> [[X_SROA_0_8_VEC_INSERT]], i32 [[D:%.*]], i32 3 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[X_SROA_0_12_VEC_INSERT]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP0]] ; ; DEBUG-LABEL: @test13( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: [[X_SROA_0:%.*]] = alloca <2 x i64>, align 16, !dbg [[DBG354:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META349:![0-9]+]], metadata !DIExpression()), !dbg [[DBG354]] -; DEBUG-NEXT: store i32 [[A:%.*]], ptr [[X_SROA_0]], align 16, !dbg [[DBG355:![0-9]+]] +; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META349:![0-9]+]], metadata !DIExpression()), !dbg [[DBG354:![0-9]+]] +; DEBUG-NEXT: [[X_SROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i32 0, !dbg [[DBG355:![0-9]+]] ; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META350:![0-9]+]], metadata !DIExpression()), !dbg [[DBG356:![0-9]+]] -; DEBUG-NEXT: [[X_SROA_0_4_X_TMP2_SROA_IDX1:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 4, !dbg [[DBG357:![0-9]+]] -; DEBUG-NEXT: store i32 [[B:%.*]], ptr [[X_SROA_0_4_X_TMP2_SROA_IDX1]], align 4, !dbg [[DBG357]] +; DEBUG-NEXT: [[X_SROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x i32> [[X_SROA_0_0_VEC_INSERT]], i32 [[B:%.*]], i32 1, !dbg [[DBG357:![0-9]+]] ; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META351:![0-9]+]], metadata !DIExpression()), !dbg [[DBG358:![0-9]+]] -; DEBUG-NEXT: [[X_SROA_0_8_X_TMP3_SROA_IDX2:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 8, !dbg [[DBG359:![0-9]+]] -; DEBUG-NEXT: store i32 [[C:%.*]], ptr [[X_SROA_0_8_X_TMP3_SROA_IDX2]], align 8, !dbg [[DBG359]] +; DEBUG-NEXT: [[X_SROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x i32> [[X_SROA_0_4_VEC_INSERT]], i32 [[C:%.*]], i32 2, !dbg [[DBG359:![0-9]+]] ; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META352:![0-9]+]], metadata !DIExpression()), !dbg [[DBG360:![0-9]+]] -; DEBUG-NEXT: [[X_SROA_0_12_X_TMP4_SROA_IDX3:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 12, !dbg [[DBG361:![0-9]+]] -; DEBUG-NEXT: store i32 [[D:%.*]], ptr [[X_SROA_0_12_X_TMP4_SROA_IDX3]], align 4, !dbg [[DBG361]] -; DEBUG-NEXT: [[X_SROA_0_0_X_SROA_0_0_RESULT:%.*]] = load <2 x i64>, ptr [[X_SROA_0]], align 16, !dbg [[DBG362:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata <2 x i64> [[X_SROA_0_0_X_SROA_0_0_RESULT]], metadata [[META353:![0-9]+]], metadata !DIExpression()), !dbg [[DBG362]] -; DEBUG-NEXT: ret <2 x i64> [[X_SROA_0_0_X_SROA_0_0_RESULT]], !dbg [[DBG363:![0-9]+]] +; DEBUG-NEXT: [[X_SROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x i32> [[X_SROA_0_8_VEC_INSERT]], i32 [[D:%.*]], i32 3, !dbg [[DBG361:![0-9]+]] +; DEBUG-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[X_SROA_0_12_VEC_INSERT]] to <2 x i64>, !dbg [[DBG362:![0-9]+]] +; DEBUG-NEXT: call void @llvm.dbg.value(metadata <2 x i64> [[TMP0]], metadata [[META353:![0-9]+]], metadata !DIExpression()), !dbg [[DBG362]] +; DEBUG-NEXT: ret <2 x i64> [[TMP0]], !dbg [[DBG363:![0-9]+]] ; entry: %x = alloca [4 x i32] @@ -938,43 +930,35 @@ ; cast to a different vector type ; CHECK-LABEL: @test14( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[X_ADDR:%.*]] = alloca <2 x i64>, align 16 -; CHECK-NEXT: store <2 x i64> [[X:%.*]], ptr [[X_ADDR]], align 16 -; CHECK-NEXT: [[X_ADDR_0_A:%.*]] = load i32, ptr [[X_ADDR]], align 16 -; CHECK-NEXT: [[X_ADDR_4_X_TMP2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[X_ADDR]], i64 4 -; CHECK-NEXT: [[X_ADDR_4_B:%.*]] = load i32, ptr [[X_ADDR_4_X_TMP2_SROA_IDX]], align 4 -; CHECK-NEXT: [[X_ADDR_8_X_TMP3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[X_ADDR]], i64 8 -; CHECK-NEXT: [[X_ADDR_8_C:%.*]] = load i32, ptr [[X_ADDR_8_X_TMP3_SROA_IDX]], align 8 -; CHECK-NEXT: [[X_ADDR_12_X_TMP4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[X_ADDR]], i64 12 -; CHECK-NEXT: [[X_ADDR_12_D:%.*]] = load i32, ptr [[X_ADDR_12_X_TMP4_SROA_IDX]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X_ADDR_0_A]], [[X_ADDR_4_B]] -; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[X_ADDR_8_C]], [[X_ADDR_12_D]] +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[X:%.*]] to <4 x i32> +; CHECK-NEXT: [[X_ADDR_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 0 +; CHECK-NEXT: [[X_ADDR_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1 +; CHECK-NEXT: [[X_ADDR_SROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 2 +; CHECK-NEXT: [[X_ADDR_SROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X_ADDR_SROA_0_0_VEC_EXTRACT]], [[X_ADDR_SROA_0_4_VEC_EXTRACT]] +; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[X_ADDR_SROA_0_8_VEC_EXTRACT]], [[X_ADDR_SROA_0_12_VEC_EXTRACT]] ; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ADD]], [[ADD1]] ; CHECK-NEXT: ret i32 [[ADD2]] ; ; DEBUG-LABEL: @test14( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: [[X_ADDR:%.*]] = alloca <2 x i64>, align 16, !dbg [[DBG378:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr [[X_ADDR]], metadata [[META366:![0-9]+]], metadata !DIExpression()), !dbg [[DBG378]] -; DEBUG-NEXT: store <2 x i64> [[X:%.*]], ptr [[X_ADDR]], align 16, !dbg [[DBG379:![0-9]+]] +; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META366:![0-9]+]], metadata !DIExpression()), !dbg [[DBG378:![0-9]+]] +; DEBUG-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[X:%.*]] to <4 x i32>, !dbg [[DBG379:![0-9]+]] ; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META367:![0-9]+]], metadata !DIExpression()), !dbg [[DBG380:![0-9]+]] -; DEBUG-NEXT: [[X_ADDR_0_A:%.*]] = load i32, ptr [[X_ADDR]], align 16, !dbg [[DBG381:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[X_ADDR_0_A]], metadata [[META368:![0-9]+]], metadata !DIExpression()), !dbg [[DBG381]] +; DEBUG-NEXT: [[X_ADDR_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 0, !dbg [[DBG381:![0-9]+]] +; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[X_ADDR_SROA_0_0_VEC_EXTRACT]], metadata [[META368:![0-9]+]], metadata !DIExpression()), !dbg [[DBG381]] ; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META369:![0-9]+]], metadata !DIExpression()), !dbg [[DBG382:![0-9]+]] -; DEBUG-NEXT: [[X_ADDR_4_X_TMP2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[X_ADDR]], i64 4, !dbg [[DBG383:![0-9]+]] -; DEBUG-NEXT: [[X_ADDR_4_B:%.*]] = load i32, ptr [[X_ADDR_4_X_TMP2_SROA_IDX]], align 4, !dbg [[DBG383]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[X_ADDR_4_B]], metadata [[META370:![0-9]+]], metadata !DIExpression()), !dbg [[DBG383]] +; DEBUG-NEXT: [[X_ADDR_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1, !dbg [[DBG383:![0-9]+]] +; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[X_ADDR_SROA_0_4_VEC_EXTRACT]], metadata [[META370:![0-9]+]], metadata !DIExpression()), !dbg [[DBG383]] ; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META371:![0-9]+]], metadata !DIExpression()), !dbg [[DBG384:![0-9]+]] -; DEBUG-NEXT: [[X_ADDR_8_X_TMP3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[X_ADDR]], i64 8, !dbg [[DBG385:![0-9]+]] -; DEBUG-NEXT: [[X_ADDR_8_C:%.*]] = load i32, ptr [[X_ADDR_8_X_TMP3_SROA_IDX]], align 8, !dbg [[DBG385]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[X_ADDR_8_C]], metadata [[META372:![0-9]+]], metadata !DIExpression()), !dbg [[DBG385]] +; DEBUG-NEXT: [[X_ADDR_SROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 2, !dbg [[DBG385:![0-9]+]] +; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[X_ADDR_SROA_0_8_VEC_EXTRACT]], metadata [[META372:![0-9]+]], metadata !DIExpression()), !dbg [[DBG385]] ; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META373:![0-9]+]], metadata !DIExpression()), !dbg [[DBG386:![0-9]+]] -; DEBUG-NEXT: [[X_ADDR_12_X_TMP4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[X_ADDR]], i64 12, !dbg [[DBG387:![0-9]+]] -; DEBUG-NEXT: [[X_ADDR_12_D:%.*]] = load i32, ptr [[X_ADDR_12_X_TMP4_SROA_IDX]], align 4, !dbg [[DBG387]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[X_ADDR_12_D]], metadata [[META374:![0-9]+]], metadata !DIExpression()), !dbg [[DBG387]] -; DEBUG-NEXT: [[ADD:%.*]] = add i32 [[X_ADDR_0_A]], [[X_ADDR_4_B]], !dbg [[DBG388:![0-9]+]] +; DEBUG-NEXT: [[X_ADDR_SROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3, !dbg [[DBG387:![0-9]+]] +; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[X_ADDR_SROA_0_12_VEC_EXTRACT]], metadata [[META374:![0-9]+]], metadata !DIExpression()), !dbg [[DBG387]] +; DEBUG-NEXT: [[ADD:%.*]] = add i32 [[X_ADDR_SROA_0_0_VEC_EXTRACT]], [[X_ADDR_SROA_0_4_VEC_EXTRACT]], !dbg [[DBG388:![0-9]+]] ; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[ADD]], metadata [[META375:![0-9]+]], metadata !DIExpression()), !dbg [[DBG388]] -; DEBUG-NEXT: [[ADD1:%.*]] = add i32 [[X_ADDR_8_C]], [[X_ADDR_12_D]], !dbg [[DBG389:![0-9]+]] +; DEBUG-NEXT: [[ADD1:%.*]] = add i32 [[X_ADDR_SROA_0_8_VEC_EXTRACT]], [[X_ADDR_SROA_0_12_VEC_EXTRACT]], !dbg [[DBG389:![0-9]+]] ; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[ADD1]], metadata [[META376:![0-9]+]], metadata !DIExpression()), !dbg [[DBG389]] ; DEBUG-NEXT: [[ADD2:%.*]] = add i32 [[ADD]], [[ADD1]], !dbg [[DBG390:![0-9]+]] ; DEBUG-NEXT: call void @llvm.dbg.value(metadata i32 [[ADD2]], metadata [[META377:![0-9]+]], metadata !DIExpression()), !dbg [[DBG390]]