diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5179,11 +5179,53 @@ bool ReSchedule = false; LLVM_DEBUG(dbgs() << "SLP: bundle: " << *S.OpValue << "\n"); + auto &&TryScheduleBundle = [this, OldScheduleEnd, SLP](bool ReSchedule, + ScheduleData *Bundle) { + // The scheduling region got new instructions at the lower end (or it is a + // new region for the first bundle). This makes it necessary to + // recalculate all dependencies. + // It is seldom that this needs to be done a second time after adding the + // initial bundle to the region. + if (ScheduleEnd != OldScheduleEnd) { + for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) + doForAllOpcodes(I, [](ScheduleData *SD) { SD->clearDependencies(); }); + ReSchedule = true; + } + if (ReSchedule) { + resetSchedule(); + initialFillReadyList(ReadyInsts); + } + if (Bundle) { + LLVM_DEBUG(dbgs() << "SLP: try schedule bundle " << *Bundle + << " in block " << BB->getName() << "\n"); + calculateDependencies(Bundle, /*InsertInReadyList=*/true, SLP); + } + + // Now try to schedule the new bundle or (if no bundle) just calculate + // dependencies. As soon as the bundle is "ready" it means that there are no + // cyclic dependencies and we can schedule it. Note that's important that we + // don't "schedule" the bundle yet (see cancelScheduling). + while (((!Bundle && ReSchedule) || (Bundle && !Bundle->isReady())) && + !ReadyInsts.empty()) { + ScheduleData *Picked = ReadyInsts.pop_back_val(); + if (Picked->isSchedulingEntity() && Picked->isReady()) + schedule(Picked, ReadyInsts); + } + }; + // Make sure that the scheduling region contains all // instructions of the bundle. for (Value *V : VL) { - if (!extendSchedulingRegion(V, S)) + if (!extendSchedulingRegion(V, S)) { + // If the scheduling region got new instructions at the lower end (or it + // is a new region for the first bundle). This makes it necessary to + // recalculate all dependencies. + // Otherwise the compiler may crash trying to incorrectly calculate + // dependencies and emit instruction in the wrong order at the actual + // scheduling. + TryScheduleBundle(/*ReSchedule=*/false, nullptr); return None; + } } for (Value *V : VL) { @@ -5212,42 +5254,8 @@ BundleMember->FirstInBundle = Bundle; PrevInBundle = BundleMember; } - if (ScheduleEnd != OldScheduleEnd) { - // The scheduling region got new instructions at the lower end (or it is a - // new region for the first bundle). This makes it necessary to - // recalculate all dependencies. - // It is seldom that this needs to be done a second time after adding the - // initial bundle to the region. - for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) { - doForAllOpcodes(I, [](ScheduleData *SD) { - SD->clearDependencies(); - }); - } - ReSchedule = true; - } - if (ReSchedule) { - resetSchedule(); - initialFillReadyList(ReadyInsts); - } assert(Bundle && "Failed to find schedule bundle"); - - LLVM_DEBUG(dbgs() << "SLP: try schedule bundle " << *Bundle << " in block " - << BB->getName() << "\n"); - - calculateDependencies(Bundle, true, SLP); - - // Now try to schedule the new bundle. As soon as the bundle is "ready" it - // means that there are no cyclic dependencies and we can schedule it. - // Note that's important that we don't "schedule" the bundle yet (see - // cancelScheduling). - while (!Bundle->isReady() && !ReadyInsts.empty()) { - - ScheduleData *pickedSD = ReadyInsts.pop_back_val(); - - if (pickedSD->isSchedulingEntity() && pickedSD->isReady()) { - schedule(pickedSD, ReadyInsts); - } - } + TryScheduleBundle(ReSchedule, Bundle); if (!Bundle->isReady()) { cancelScheduling(VL, S.OpValue); return None; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll @@ -0,0 +1,100 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -slp-vectorizer -slp-min-tree-size=2 -slp-threshold=-1000 -slp-max-look-ahead-depth=1 -slp-look-ahead-users-budget=1 -slp-schedule-budget=27 -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +define void @exceed(double %0, double %1) { +; CHECK-LABEL: @exceed( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[TMP0:%.*]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[TMP1:%.*]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = fdiv fast <2 x double> [[TMP3]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 1 +; CHECK-NEXT: [[IX:%.*]] = fmul double [[TMP7]], undef +; CHECK-NEXT: [[IXX0:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[IXX1:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[IXX2:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[IXX3:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[IXX4:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[IXX5:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[IX1:%.*]] = fmul double [[TMP7]], undef +; CHECK-NEXT: [[IXX10:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[IXX11:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[IXX12:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[IXX13:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[IXX14:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[IXX15:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[IXX20:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[IXX21:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[IXX22:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 0 +; CHECK-NEXT: [[IX2:%.*]] = fmul double [[TMP8]], [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP6]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <2 x double> [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[IXX101:%.*]] = fsub double undef, undef +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x double> poison, double [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x double> [[TMP13]], double undef, i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x double> , double [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP16:%.*]] = fmul fast <2 x double> [[TMP14]], [[TMP15]] +; CHECK-NEXT: switch i32 undef, label [[BB1:%.*]] [ +; CHECK-NEXT: i32 0, label [[BB2:%.*]] +; CHECK-NEXT: ] +; CHECK: bb1: +; CHECK-NEXT: br label [[LABEL:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x double> [[TMP16]], i32 0 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x double> poison, double [[TMP17]], i32 0 +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[TMP16]], i32 1 +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x double> [[TMP18]], double [[TMP19]], i32 1 +; CHECK-NEXT: br label [[LABEL]] +; CHECK: label: +; CHECK-NEXT: [[TMP21:%.*]] = phi <2 x double> [ [[TMP12]], [[BB1]] ], [ [[TMP20]], [[BB2]] ] +; CHECK-NEXT: ret void +; +entry: + %i10 = fdiv fast double %0, %1 + %ix = fmul double %i10, undef + %ixx0 = fsub double undef, undef + %ixx1 = fsub double undef, undef + %ixx2 = fsub double undef, undef + %ixx3 = fsub double undef, undef + %ixx4 = fsub double undef, undef + %ixx5 = fsub double undef, undef + %ix1 = fmul double %i10, undef + %ixx10 = fsub double undef, undef + %ixx11 = fsub double undef, undef + %ixx12 = fsub double undef, undef + %ixx13 = fsub double undef, undef + %ixx14 = fsub double undef, undef + %ixx15 = fsub double undef, undef + %ixx20 = fsub double undef, undef + %ixx21 = fsub double undef, undef + %ixx22 = fsub double undef, undef + %i11 = fdiv fast double %0, %1 + %ix2 = fmul double %i11, %i11 + %tmp1 = fadd fast double %i11, %0 + %tmp2 = fadd fast double %0, %1 + %tmp5 = fmul fast double %tmp1, %tmp2 + %tmp15 = fadd fast double %i10, %1 + %tmp25 = fadd fast double %0, %1 + %tmp6 = fmul fast double %tmp15, %tmp25 + %tmp555 = fmul fast double %i10, undef + %ixx101 = fsub double undef, undef + %tmp666 = fmul fast double %1, undef + switch i32 undef, label %bb1 [ + i32 0, label %bb2 + ] + +bb1: ; preds = %entry + br label %label + +bb2: ; preds = %entry + br label %label + +label: ; preds = %bb2, %bb1 + %phi1 = phi double [ %tmp5, %bb1 ], [ %tmp555, %bb2 ] + %phi2 = phi double [ %tmp6, %bb1 ], [ %tmp666, %bb2 ] + ret void +}