Index: lib/Transforms/Scalar/SimpleLoopUnswitch.cpp =================================================================== --- lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -304,10 +304,10 @@ formLCSSA(*OldContainingL, DT, &LI, nullptr); // We shouldn't need to form dedicated exits because the exit introduced - // here is the (just split by unswitching) preheader. As such, it is - // necessarily dedicated. - assert(OldContainingL->hasDedicatedExits() && - "Unexpected predecessor of hoisted loop preheader!"); + // here is the (just split by unswitching) preheader. However, after trivial + // switches, we may have non-dedicated exits, so let's conservatively + // form dedicated exit blocks and figure out if we can optimize later. + formDedicatedExitBlocks(OldContainingL, &DT, &LI, /*PreserveLCSSA*/ true); } } @@ -516,6 +516,7 @@ if (FullUnswitch) hoistLoopToNewParent(L, *NewPH, DT, LI); + LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n"); ++NumTrivial; ++NumBranches; return true; @@ -574,7 +575,7 @@ else if (ExitCaseIndices.empty()) return false; - LLVM_DEBUG(dbgs() << " unswitching trivial cases...\n"); + LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n"); if (MSSAU && VerifyMemorySSA) MSSAU->getMemorySSA()->verifyMemorySSA(); @@ -786,6 +787,7 @@ ++NumTrivial; ++NumSwitches; + LLVM_DEBUG(dbgs() << " done: unswitching trivial switch...\n"); return true; } Index: test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial1.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial1.ll @@ -0,0 +1,25 @@ +; RUN: opt < %s -simple-loop-unswitch -disable-output + +; PR38283 +; PR38737 +define void @f1() { +for.cond1thread-pre-split.lr.ph.lr.ph: + %tobool4 = icmp eq i16 undef, 0 + br label %for.cond1thread-pre-split + +for.cond1thread-pre-split: ; preds = %if.end, %for.cond1thread-pre-split.lr.ph.lr.ph + %tobool3 = icmp eq i16 undef, 0 + br label %for.body2 + +for.body2: ; preds = %if.end6, %for.cond1thread-pre-split + br i1 %tobool3, label %if.end, label %for.end + +if.end: ; preds = %for.body2 + br i1 %tobool4, label %if.end6, label %for.cond1thread-pre-split + +if.end6: ; preds = %if.end + br i1 undef, label %for.body2, label %for.end + +for.end: ; preds = %if.end6, %for.body2 + ret void +} Index: test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial2.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial2.ll @@ -0,0 +1,22 @@ +; RUN: opt < %s -simple-loop-unswitch -disable-output + +; PR38283 +; PR38737 +define void @Test(i32) { +entry: + %trunc = trunc i32 %0 to i3 + br label %outer +outer: + br label %inner +inner: + switch i3 %trunc, label %crit_edge [ + i3 2, label %break + i3 1, label %loopexit + ] +crit_edge: + br i1 true, label %loopexit, label %inner +loopexit: + ret void +break: + br label %outer +} Index: test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial3.ll =================================================================== --- /dev/null +++ test/Transforms/SimpleLoopUnswitch/formDedicatedAfterTrivial3.ll @@ -0,0 +1,105 @@ +; RUN: opt <%s -mtriple=s390x-linux-gnu -O3 -mcpu=z13 -enable-simple-loop-unswitch -disable-output + +; PR38283 +; PR38737 +source_filename = "crash0.c" +target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" +target triple = "s390x-ibm-linux" + +@g_450 = external dso_local global [8 x [8 x [4 x i32]]], align 4 + +; Function Attrs: nounwind +define dso_local void @main() #0 { +bb: + call void @func_1() + unreachable +} + +; Function Attrs: nounwind +define dso_local void @func_1() #0 { +bb: + %tmp = load i32, i32* getelementptr inbounds ([8 x [8 x [4 x i32]]], [8 x [8 x [4 x i32]]]* @g_450, i64 0, i64 3, i64 7, i64 2), align 4, !tbaa !1 + call void @func_9(i32 signext %tmp) + unreachable +} + +; Function Attrs: nounwind +define dso_local void @func_9(i32 signext %arg) #0 { +bb: + %tmp = alloca i32, align 4 + %tmp1 = alloca i32, align 4 + %tmp2 = alloca i32, align 4 + %tmp3 = alloca i32, align 4 + %tmp4 = alloca i32*, align 8 + store i32 %arg, i32* %tmp, align 4, !tbaa !1 + store i32* %tmp3, i32** %tmp4, align 8, !tbaa !5 + br label %bb5 + +bb5: ; preds = %bb24, %bb + store i32 0, i32* %tmp2, align 4, !tbaa !1 + br label %bb6 + +bb6: ; preds = %bb19, %bb5 + %tmp7 = load i32, i32* %tmp2, align 4, !tbaa !1 + %tmp8 = icmp ne i32 %tmp7, 25 + br i1 %tmp8, label %bb9, label %bb22 + +bb9: ; preds = %bb6 + %tmp10 = load i32, i32* %tmp, align 4, !tbaa !1 + %tmp11 = icmp ne i32 %tmp10, 0 + br i1 %tmp11, label %bb12, label %bb13 + +bb12: ; preds = %bb9 + store i32 9, i32* %tmp1, align 4 + br label %bb17 + +bb13: ; preds = %bb9 + %tmp14 = load i32, i32* %tmp3, align 4, !tbaa !1 + %tmp15 = icmp ne i32 %tmp14, 0 + br i1 %tmp15, label %bb16, label %bb17 + +bb16: ; preds = %bb13 + store i32 49, i32* %tmp1, align 4 + br label %bb17 + +bb17: ; preds = %bb16, %bb13, %bb12 + %tmp18 = load i32, i32* %tmp1, align 4 + switch i32 %tmp18, label %bb24 [ + i32 0, label %bb19 + i32 49, label %bb22 + ] + +bb19: ; preds = %bb17 + %tmp20 = load i32, i32* %tmp2, align 4, !tbaa !1 + %tmp21 = add nsw i32 %tmp20, 1 + store i32 %tmp21, i32* %tmp2, align 4, !tbaa !1 + br label %bb6 + +bb22: ; preds = %bb17, %bb6 + br label %bb23 + +bb23: ; preds = %bb23, %bb22 + br label %bb23 + +bb24: ; preds = %bb17 + %tmp25 = icmp sge i32 0, undef + %tmp26 = zext i1 %tmp25 to i32 + %tmp27 = load i32*, i32** %tmp4, align 8, !tbaa !5 + %tmp28 = load i32, i32* %tmp27, align 4, !tbaa !1 + %tmp29 = or i32 %tmp28, %tmp26 + store i32 %tmp29, i32* %tmp27, align 4, !tbaa !1 + store i32 0, i32* %tmp1, align 4 + br label %bb5 +} + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="z13" "target-features"="+transactional-execution,+vector" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 7.0.0 (http://llvm.org/git/clang.git 31fe70446bc81aa70778c3d3eaa20263eba3c4e5) (http://llvm.org/git/llvm.git 7a4d8f6dc1a41dca2f39a4327b3a4fdd5263ad88)"} +!1 = !{!2, !2, i64 0} +!2 = !{!"int", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C/C++ TBAA"} +!5 = !{!6, !6, i64 0} +!6 = !{!"any pointer", !3, i64 0}