diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6214,6 +6214,9 @@ break; } + // Tracks if we tried to vectorize stores starting from the given tail + // already. + SmallBitVector TriedTails(E, false); // For stores that start but don't end a link in the chain: for (int Cnt = E; Cnt > 0; --Cnt) { int I = Cnt - 1; @@ -6230,8 +6233,9 @@ // Mark the new end in the chain and go back, if required. It might be // required if the original stores come in reversed order, for example. if (ConsecutiveChain[I].first != E && - Tails.test(ConsecutiveChain[I].first) && + Tails.test(ConsecutiveChain[I].first) && !TriedTails.test(I) && !VectorizedStores.count(Stores[ConsecutiveChain[I].first])) { + TriedTails.set(I); Tails.reset(ConsecutiveChain[I].first); if (Cnt < ConsecutiveChain[I].first + 2) Cnt = ConsecutiveChain[I].first + 2; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/several_store_chains.ll b/llvm/test/Transforms/SLPVectorizer/X86/several_store_chains.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/several_store_chains.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -mtriple=x86_64-unknown -slp-vectorizer | FileCheck %s + +; Test for PR49898. +define void @fusion_1506(i8* %temp_buf1) local_unnamed_addr { +; CHECK-LABEL: @fusion_1506( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[TEMP_BUF1:%.*]], i64 5621415936 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TEMP_BUF1]], i64 7278166016 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TEMP_BUF1]], i64 5097127936 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float* +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP1]] to float* +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP4]], i64 undef +; CHECK-NEXT: store float undef, float* [[TMP5]], align 16 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP0]] to float* +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[TMP6]], i64 undef +; CHECK-NEXT: store float undef, float* [[TMP7]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP6]], i64 undef +; CHECK-NEXT: store float undef, float* [[TMP8]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP3]], i64 undef +; CHECK-NEXT: store float undef, float* [[TMP9]], align 4 +; CHECK-NEXT: ret void +; +entry: + %0 = getelementptr inbounds i8, i8* %temp_buf1, i64 5621415936 + %1 = getelementptr inbounds i8, i8* %temp_buf1, i64 7278166016 + %2 = getelementptr inbounds i8, i8* %temp_buf1, i64 5097127936 + %3 = bitcast i8* %2 to float* + %4 = bitcast i8* %1 to float* + %5 = getelementptr inbounds float, float* %4, i64 undef + store float undef, float* %5, align 16 + %6 = bitcast i8* %0 to float* + %7 = getelementptr inbounds float, float* %6, i64 undef + store float undef, float* %7, align 16 + %8 = getelementptr inbounds float, float* %6, i64 undef + store float undef, float* %8, align 4 + %9 = getelementptr inbounds float, float* %3, i64 undef + store float undef, float* %9, align 4 + ret void +}