Index: lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/SLPVectorizer.cpp +++ lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3111,6 +3111,12 @@ // TODO: Merge this shuffle with the ReorderShuffleMask. if (!E->ReorderIndices.empty()) Builder.SetInsertPoint(VL0); + else if (auto *I = dyn_cast(V)) + Builder.SetInsertPoint(I->getParent(), + std::next(I->getIterator())); + else + Builder.SetInsertPoint(&F->getEntryBlock(), + F->getEntryBlock().getFirstInsertionPt()); V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), E->ReuseShuffleIndices, "shuffle"); } Index: test/Transforms/SLPVectorizer/AArch64/PR38339.ll =================================================================== --- /dev/null +++ test/Transforms/SLPVectorizer/AArch64/PR38339.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -slp-vectorizer -S -mtriple=aarch64-apple-ios -mcpu=cyclone -o - %s | FileCheck %s + +@a = dso_local global i16 0, align 2 + +define void @f1() { +; CHECK-LABEL: @f1( +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint <2 x i16*> undef to <2 x i16> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> undef, <4 x i32> +; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0 +; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1 +; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2 +; CHECK-NEXT: [[PTR3:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3 +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br i1 undef, label [[BB3:%.*]], label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i16> [[SHUFFLE]], i32 0 +; CHECK-NEXT: store i16 [[TMP2]], i16* @a, align 2 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[PTR0]] to <4 x i16>* +; CHECK-NEXT: store <4 x i16> [[SHUFFLE]], <4 x i16>* [[TMP3]], align 2 +; CHECK-NEXT: br label [[BB1]] +; CHECK: bb3: +; CHECK-NEXT: ret void +; + %1 = ptrtoint <2 x i16*> undef to <2 x i16> + %2 = extractelement <2 x i16> %1, i32 0 + %3 = extractelement <2 x i16> %1, i32 1 + %ptr0 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0 + %ptr1 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1 + %ptr2 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2 + %ptr3 = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 3 + br label %bb1 + +bb1: ; preds = %bb2, %0 + br i1 undef, label %bb3, label %bb2 + +bb2: ; preds = %bb1 + store i16 %2, i16* @a, align 2 + store i16 %2, i16* %ptr0, align 2 + store i16 %3, i16* %ptr1, align 2 + store i16 %3, i16* %ptr2, align 2 + store i16 %2, i16* %ptr3, align 2 + br label %bb1 + +bb3: ; preds = %bb1 + ret void +}