Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5481,11 +5481,9 @@ return None; } - ElementCount MaxVF = computeFeasibleMaxVF(TC, UserVF); - switch (ScalarEpilogueStatus) { case CM_ScalarEpilogueAllowed: - return MaxVF; + return computeFeasibleMaxVF(TC, UserVF); case CM_ScalarEpilogueNotAllowedUsePredicate: LLVM_FALLTHROUGH; case CM_ScalarEpilogueNotNeededUsePredicate: @@ -5523,7 +5521,7 @@ LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking: vectorize with a " "scalar epilogue instead.\n"); ScalarEpilogueStatus = CM_ScalarEpilogueAllowed; - return MaxVF; + return computeFeasibleMaxVF(TC, UserVF); } return None; } @@ -5540,6 +5538,7 @@ InterleaveInfo.invalidateGroupsRequiringScalarEpilogue(); } + ElementCount MaxVF = computeFeasibleMaxVF(TC, UserVF); assert(!MaxVF.isScalable() && "Scalable vectors do not yet support tail folding"); assert((UserVF.isNonZero() || isPowerOf2_32(MaxVF.getFixedValue())) && Index: llvm/test/Transforms/LoopVectorize/Hexagon/maximum-vf-crash.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/Hexagon/maximum-vf-crash.ll @@ -0,0 +1,83 @@ +; RUN: opt -march=hexagon -hexagon-autohvx -loop-vectorize -disable-output < %s + +; Check that we don't crash. +; +; Testcase originated from this C code: +; +; typedef struct { +; char a; +; } b; +; +; b *c; +; int d, e; +; +; int f() { +; int g = 0; +; for (; d; d++) { +; e = 0; +; for (; e < c[d].a; e++) +; g++; +; } +; return g; +; } +; +; which was crashing when compiling with: +; +; clang -Os -mhvx -fvectorize -mv67 testcase.c -S -o - +; +; Source of the crash was introduced in D90687. +; +; IR generated by: +; +; ./bin/clang -Os -mhvx -fvectorize -mv67 testcase.c -S -emit-llvm -o testcase.ll + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +%struct.b = type { i8 } + +@d = dso_local local_unnamed_addr global i32 0, align 4 +@e = dso_local local_unnamed_addr global i32 0, align 4 +@c = dso_local local_unnamed_addr global %struct.b* null, align 4 + +; Function Attrs: optsize +define dso_local i32 @f() local_unnamed_addr #0 { +entry: + %.pr = load i32, i32* @d, align 4 + %tobool.not15 = icmp eq i32 %.pr, 0 + br i1 %tobool.not15, label %for.end7, label %for.cond1.preheader.lr.ph + +for.cond1.preheader.lr.ph: ; preds = %entry + %0 = load %struct.b*, %struct.b** @c, align 4 + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.cond1.preheader.lr.ph, %for.cond1.preheader + %g.016 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %g.1.lcssa, %for.cond1.preheader ] + %1 = phi i32 [ %.pr, %for.cond1.preheader.lr.ph ], [ %inc6, %for.cond1.preheader ] + %a10 = getelementptr inbounds %struct.b, %struct.b* %0, i32 %1, i32 0 + %2 = load i8, i8* %a10, align 1 + %cmp12.not = icmp eq i8 %2, 0 + %conv = zext i8 %2 to i32 + %3 = icmp ugt i32 %conv, 1 + %umax = select i1 %3, i32 %conv, i32 1 + %4 = select i1 %cmp12.not, i32 0, i32 %umax + %g.1.lcssa = add i32 %g.016, %4 + %inc6 = add nsw i32 %1, 1 + %tobool.not = icmp eq i32 %inc6, 0 + br i1 %tobool.not, label %for.cond.for.end7_crit_edge, label %for.cond1.preheader, !llvm.loop !0 + +for.cond.for.end7_crit_edge: ; preds = %for.cond1.preheader + %inc4.lcssa18 = select i1 %cmp12.not, i32 0, i32 %umax + store i32 %inc4.lcssa18, i32* @e, align 4 + store i32 0, i32* @d, align 4 + br label %for.end7 + +for.end7: ; preds = %for.cond.for.end7_crit_edge, %entry + %g.0.lcssa = phi i32 [ %g.1.lcssa, %for.cond.for.end7_crit_edge ], [ 0, %entry ] + ret i32 %g.0.lcssa +} + +attributes #0 = { optsize "target-cpu"="hexagonv67" "target-features"="+hvx-length128b,+hvxv67,+v67,-long-calls" } + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.mustprogress"}