Index: lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/SLPVectorizer.cpp +++ lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3719,6 +3719,32 @@ return PA; } +namespace { +/// Class tracks changes in the basic block and if something has changed and +/// this basic block is part of the loop it communicates ScalarEvolution to +/// forget an existing loop dispositions. Otherwise we may to reuse +/// ScalarEvolution nodes and get wrong info about loop dispositions. +class TrackChanges { +private: + ScalarEvolution *SE = nullptr; + const LoopInfo *LI = nullptr; + const BasicBlock *BB = nullptr; + bool Changed = false; + +public: + TrackChanges(ScalarEvolution *SE, const LoopInfo *LI) : SE(SE), LI(LI) {} + void setBasicBlock(const BasicBlock *B) { BB = B; } + void track(bool C) { + if (C) { + if (const Loop *L = LI->getLoopFor(BB)) + SE->forgetLoopDispositions(L); + Changed = true; + } + } + bool isChanged() const { return Changed; } +}; +} // namespace + bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_, TargetTransformInfo *TTI_, TargetLibraryInfo *TLI_, AliasAnalysis *AA_, @@ -3736,7 +3762,7 @@ Stores.clear(); GEPs.clear(); - bool Changed = false; + TrackChanges Tracker(SE, LI); // If the target claims to have no vector registers don't attempt // vectorization. @@ -3758,17 +3784,18 @@ // Scan the blocks in the function in post order. for (auto BB : post_order(&F.getEntryBlock())) { + Tracker.setBasicBlock(BB); collectSeedInstructions(BB); // Vectorize trees that end at stores. if (!Stores.empty()) { DEBUG(dbgs() << "SLP: Found stores for " << Stores.size() << " underlying objects.\n"); - Changed |= vectorizeStoreChains(R); + Tracker.track(vectorizeStoreChains(R)); } // Vectorize trees that end at reductions. - Changed |= vectorizeChainsInBlock(BB, R); + Tracker.track(vectorizeChainsInBlock(BB, R)); // Vectorize the index computations of getelementptr instructions. This // is primarily intended to catch gather-like idioms ending at @@ -3776,16 +3803,16 @@ if (!GEPs.empty()) { DEBUG(dbgs() << "SLP: Found GEPs for " << GEPs.size() << " underlying objects.\n"); - Changed |= vectorizeGEPIndices(BB, R); + Tracker.track(vectorizeGEPIndices(BB, R)); } } - if (Changed) { + if (Tracker.isChanged()) { R.optimizeGatherSequence(); DEBUG(dbgs() << "SLP: vectorized \"" << F.getName() << "\"\n"); DEBUG(verifyFunction(F)); } - return Changed; + return Tracker.isChanged(); } /// \brief Check that the Values in the slice in VL array are still existent in Index: test/Transforms/SLPVectorizer/X86/crash-SCEV.ll =================================================================== --- /dev/null +++ test/Transforms/SLPVectorizer/X86/crash-SCEV.ll @@ -0,0 +1,151 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -slp-vectorizer -S -o - -mtriple=i386 -mcpu=haswell < %s | FileCheck %s +target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" + +@shift = common local_unnamed_addr global [10 x i32] zeroinitializer, align 4 +@data = common local_unnamed_addr global [10 x i8*] zeroinitializer, align 4 + +define void @flat(i32 %intensity) { +; CHECK-LABEL: @flat( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 0), align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 1), align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 0), align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 1), align 4 +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 1, [[TMP0]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 [[SHR]] +; CHECK-NEXT: [[SHR1:%.*]] = lshr i32 1, [[TMP1]] +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[TMP3]], i32 [[SHR1]] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[D1_DATA_046:%.*]] = phi i8* [ [[TMP3]], [[ENTRY:%.*]] ], [ [[ADD_PTR23_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[Y_045:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC_1:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = load i8, i8* [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i8> undef, i8 [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i8> [[TMP6]], i8 [[TMP4]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = zext <2 x i8> [[TMP7]] to <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = add nsw <2 x i32> , [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp sgt <2 x i32> [[TMP9]], +; CHECK-NEXT: [[TMP11:%.*]] = sub nsw <2 x i32> , [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = select <2 x i1> [[TMP10]], <2 x i32> [[TMP9]], <2 x i32> [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i32> [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i32> [[TMP12]], i32 1 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[IDX_NEG:%.*]] = sub nsw i32 0, [[ADD]] +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[D1_DATA_046]], i32 [[IDX_NEG]] +; CHECK-NEXT: [[TMP15:%.*]] = load i8, i8* [[ADD_PTR]], align 1 +; CHECK-NEXT: [[CONV15:%.*]] = zext i8 [[TMP15]] to i32 +; CHECK-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], [[INTENSITY:%.*]] +; CHECK-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 +; CHECK-NEXT: store i8 [[CONV17]], i8* [[ADD_PTR]], align 1 +; CHECK-NEXT: [[ADD_PTR18:%.*]] = getelementptr inbounds i8, i8* [[D1_DATA_046]], i32 [[ADD]] +; CHECK-NEXT: [[TMP16:%.*]] = load i8, i8* [[ADD_PTR18]], align 1 +; CHECK-NEXT: [[NOT_TOBOOL:%.*]] = icmp eq i8 [[TMP16]], 0 +; CHECK-NEXT: [[CONV21:%.*]] = zext i1 [[NOT_TOBOOL]] to i8 +; CHECK-NEXT: store i8 [[CONV21]], i8* [[ADD_PTR18]], align 1 +; CHECK-NEXT: [[ADD_PTR23:%.*]] = getelementptr inbounds i8, i8* [[D1_DATA_046]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP17:%.*]] = load i8, i8* [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = load i8, i8* [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i8> undef, i8 [[TMP18]], i32 0 +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x i8> [[TMP19]], i8 [[TMP17]], i32 1 +; CHECK-NEXT: [[TMP21:%.*]] = zext <2 x i8> [[TMP20]] to <2 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = add nsw <2 x i32> , [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = icmp sgt <2 x i32> [[TMP22]], +; CHECK-NEXT: [[TMP24:%.*]] = sub nsw <2 x i32> , [[TMP21]] +; CHECK-NEXT: [[TMP25:%.*]] = select <2 x i1> [[TMP23]], <2 x i32> [[TMP22]], <2 x i32> [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x i32> [[TMP25]], i32 0 +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x i32> [[TMP25]], i32 1 +; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +; CHECK-NEXT: [[IDX_NEG_1:%.*]] = sub nsw i32 0, [[ADD_1]] +; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR23]], i32 [[IDX_NEG_1]] +; CHECK-NEXT: [[TMP28:%.*]] = load i8, i8* [[ADD_PTR_1]], align 1 +; CHECK-NEXT: [[CONV15_1:%.*]] = zext i8 [[TMP28]] to i32 +; CHECK-NEXT: [[ADD16_1:%.*]] = add nsw i32 [[CONV15_1]], [[INTENSITY]] +; CHECK-NEXT: [[CONV17_1:%.*]] = trunc i32 [[ADD16_1]] to i8 +; CHECK-NEXT: store i8 [[CONV17_1]], i8* [[ADD_PTR_1]], align 1 +; CHECK-NEXT: [[ADD_PTR18_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR23]], i32 [[ADD_1]] +; CHECK-NEXT: [[TMP29:%.*]] = load i8, i8* [[ADD_PTR18_1]], align 1 +; CHECK-NEXT: [[NOT_TOBOOL_1:%.*]] = icmp eq i8 [[TMP29]], 0 +; CHECK-NEXT: [[CONV21_1:%.*]] = zext i1 [[NOT_TOBOOL_1]] to i8 +; CHECK-NEXT: store i8 [[CONV21_1]], i8* [[ADD_PTR18_1]], align 1 +; CHECK-NEXT: [[ADD_PTR23_1]] = getelementptr inbounds i8, i8* [[ADD_PTR23]], i32 [[TMP1]] +; CHECK-NEXT: [[INC_1]] = add nsw i32 [[Y_045]], 2 +; CHECK-NEXT: [[EXITCOND_1:%.*]] = icmp eq i32 [[INC_1]], 128 +; CHECK-NEXT: br i1 [[EXITCOND_1]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] +; +entry: + %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 0), align 4 + %1 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @shift, i32 0, i32 1), align 4 + %2 = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 0), align 4 + %3 = load i8*, i8** getelementptr inbounds ([10 x i8*], [10 x i8*]* @data, i32 0, i32 1), align 4 + %shr = lshr i32 1, %0 + %arrayidx = getelementptr inbounds i8, i8* %2, i32 %shr + %shr1 = lshr i32 1, %1 + %arrayidx2 = getelementptr inbounds i8, i8* %3, i32 %shr1 + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret void + +for.body: ; preds = %for.body, %entry + %d1_data.046 = phi i8* [ %3, %entry ], [ %add.ptr23.1, %for.body ] + %y.045 = phi i32 [ 0, %entry ], [ %inc.1, %for.body ] + %4 = load i8, i8* %arrayidx, align 1 + %conv = zext i8 %4 to i32 + %sub = add nsw i32 %conv, -128 + %5 = load i8, i8* %arrayidx2, align 1 + %conv3 = zext i8 %5 to i32 + %sub4 = add nsw i32 %conv3, -128 + %cmp5 = icmp sgt i32 %sub, -1 + %sub7 = sub nsw i32 128, %conv + %cond = select i1 %cmp5, i32 %sub, i32 %sub7 + %cmp8 = icmp sgt i32 %sub4, -1 + %sub12 = sub nsw i32 128, %conv3 + %cond14 = select i1 %cmp8, i32 %sub4, i32 %sub12 + %add = add nsw i32 %cond14, %cond + %idx.neg = sub nsw i32 0, %add + %add.ptr = getelementptr inbounds i8, i8* %d1_data.046, i32 %idx.neg + %6 = load i8, i8* %add.ptr, align 1 + %conv15 = zext i8 %6 to i32 + %add16 = add nsw i32 %conv15, %intensity + %conv17 = trunc i32 %add16 to i8 + store i8 %conv17, i8* %add.ptr, align 1 + %add.ptr18 = getelementptr inbounds i8, i8* %d1_data.046, i32 %add + %7 = load i8, i8* %add.ptr18, align 1 + %not.tobool = icmp eq i8 %7, 0 + %conv21 = zext i1 %not.tobool to i8 + store i8 %conv21, i8* %add.ptr18, align 1 + %add.ptr23 = getelementptr inbounds i8, i8* %d1_data.046, i32 %1 + %8 = load i8, i8* %arrayidx, align 1 + %conv.1 = zext i8 %8 to i32 + %sub.1 = add nsw i32 %conv.1, -128 + %9 = load i8, i8* %arrayidx2, align 1 + %conv3.1 = zext i8 %9 to i32 + %sub4.1 = add nsw i32 %conv3.1, -128 + %cmp5.1 = icmp sgt i32 %sub.1, -1 + %sub7.1 = sub nsw i32 128, %conv.1 + %cond.1 = select i1 %cmp5.1, i32 %sub.1, i32 %sub7.1 + %cmp8.1 = icmp sgt i32 %sub4.1, -1 + %sub12.1 = sub nsw i32 128, %conv3.1 + %cond14.1 = select i1 %cmp8.1, i32 %sub4.1, i32 %sub12.1 + %add.1 = add nsw i32 %cond14.1, %cond.1 + %idx.neg.1 = sub nsw i32 0, %add.1 + %add.ptr.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %idx.neg.1 + %10 = load i8, i8* %add.ptr.1, align 1 + %conv15.1 = zext i8 %10 to i32 + %add16.1 = add nsw i32 %conv15.1, %intensity + %conv17.1 = trunc i32 %add16.1 to i8 + store i8 %conv17.1, i8* %add.ptr.1, align 1 + %add.ptr18.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %add.1 + %11 = load i8, i8* %add.ptr18.1, align 1 + %not.tobool.1 = icmp eq i8 %11, 0 + %conv21.1 = zext i1 %not.tobool.1 to i8 + store i8 %conv21.1, i8* %add.ptr18.1, align 1 + %add.ptr23.1 = getelementptr inbounds i8, i8* %add.ptr23, i32 %1 + %inc.1 = add nsw i32 %y.045, 2 + %exitcond.1 = icmp eq i32 %inc.1, 128 + br i1 %exitcond.1, label %for.cond.cleanup, label %for.body +}