Index: lib/Transforms/Utils/LoopUnroll.cpp =================================================================== --- lib/Transforms/Utils/LoopUnroll.cpp +++ lib/Transforms/Utils/LoopUnroll.cpp @@ -396,8 +396,19 @@ "Did not expect runtime trip-count unrolling " "and peeling for the same loop"); - if (PeelCount) - peelLoop(L, PeelCount, LI, SE, DT, AC, PreserveLCSSA); + if (PeelCount) { + bool Peeled = peelLoop(L, PeelCount, LI, SE, DT, AC, PreserveLCSSA); + + // Successful peeling may result in a change in the loop preheader/trip + // counts. If we later unroll the loop, we want these to be updated. + if (Peeled) { + BasicBlock *ExitingBlock = L->getExitingBlock(); + assert(ExitingBlock && "Loop without exiting block?"); + Preheader = L->getLoopPreheader(); + TripCount = SE->getSmallConstantTripCount(L, ExitingBlock); + TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock); + } + } // Loops containing convergent instructions must have a count that divides // their TripMultiple. Index: test/Transforms/LoopUnroll/SystemZ/lit.local.cfg =================================================================== --- /dev/null +++ test/Transforms/LoopUnroll/SystemZ/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'SystemZ' in config.root.targets: + config.unsupported = True Index: test/Transforms/LoopUnroll/SystemZ/pr33437.ll =================================================================== --- /dev/null +++ test/Transforms/LoopUnroll/SystemZ/pr33437.ll @@ -0,0 +1,96 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -loop-unroll -S %s | FileCheck %s + +target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" +target triple = "s390x-ibm-linux" + +@g_72 = external local_unnamed_addr global i32, align 4 +@g_74 = external local_unnamed_addr global i64, align 8 +@g_126 = external local_unnamed_addr global float, align 4 +@g_202 = external global i64, align 8 +@func_6.l_542 = external unnamed_addr constant [3 x [3 x [10 x i32]]], align 4 +@g_531 = external local_unnamed_addr global i64*, align 8 +@g_623 = external local_unnamed_addr global i32*, align 8 +@g_625 = external local_unnamed_addr global i32*, align 8 +@g_355 = external local_unnamed_addr global [6 x [5 x i32*]], align 8 + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #0 +declare zeroext i8 @patatino() + +define fastcc void @func_6() unnamed_addr { +; CHECK-LABEL: @func_6( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_COND93:%.*]] +; CHECK: for.cond93.loopexit: +; CHECK-NEXT: ret void +; CHECK: for.cond93: +; CHECK-NEXT: br label [[FOR_BODY198_PEEL_BEGIN:%.*]] +; CHECK: for.body198.peel.begin: +; CHECK-NEXT: br label [[FOR_BODY198_PEEL:%.*]] +; CHECK: for.body198.peel: +; CHECK-NEXT: store i64 0, i64* @g_74, align 8 +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* bitcast ([3 x [3 x [10 x i32]]]* @func_6.l_542 to i8*), i64 360, i32 4, i1 false) +; CHECK-NEXT: store i64* @g_202, i64** @g_531, align 8 +; CHECK-NEXT: store i8 0, i8* undef, align 1 +; CHECK-NEXT: store i8 0, i8* undef, align 1 +; CHECK-NEXT: store i32* null, i32** @g_623, align 8 +; CHECK-NEXT: store i32* null, i32** @g_625, align 8 +; CHECK-NEXT: store i32 0, i32* @g_72, align 4 +; CHECK-NEXT: [[CALL593_PEEL:%.*]] = tail call zeroext i8 @patatino() +; CHECK-NEXT: store i64 -1, i64* undef, align 8 +; CHECK-NEXT: store i64 6707501019708462161, i64* undef, align 8 +; CHECK-NEXT: store float 0.000000e+00, float* undef, align 4 +; CHECK-NEXT: store float 0.000000e+00, float* @g_126, align 4 +; CHECK-NEXT: store i32* null, i32** getelementptr inbounds ([6 x [5 x i32*]], [6 x [5 x i32*]]* @g_355, i64 0, i64 4, i64 4), align 8 +; CHECK-NEXT: br i1 false, label [[FOR_BODY198_PEEL_NEXT:%.*]], label [[FOR_COND93_LOOPEXIT:%.*]] +; CHECK: for.body198.peel.next: +; CHECK-NEXT: br label [[FOR_BODY198_PEEL_NEXT1:%.*]] +; CHECK: for.body198.peel.next1: +; CHECK-NEXT: br label [[FOR_COND93_PEEL_NEWPH:%.*]] +; CHECK: for.cond93.peel.newph: +; CHECK-NEXT: br label [[FOR_BODY198:%.*]] +; CHECK: for.body198: +; CHECK-NEXT: store i64 0, i64* @g_74, align 8 +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* bitcast ([3 x [3 x [10 x i32]]]* @func_6.l_542 to i8*), i64 360, i32 4, i1 false) +; CHECK-NEXT: store i64* @g_202, i64** @g_531, align 8 +; CHECK-NEXT: store i8 0, i8* undef, align 1 +; CHECK-NEXT: store i8 0, i8* undef, align 1 +; CHECK-NEXT: store i32* null, i32** @g_623, align 8 +; CHECK-NEXT: store i32* null, i32** @g_625, align 8 +; CHECK-NEXT: store i32 0, i32* @g_72, align 4 +; CHECK-NEXT: [[CALL593:%.*]] = tail call zeroext i8 @patatino() +; CHECK-NEXT: store i64 -1, i64* undef, align 8 +; CHECK-NEXT: store i64 6707501019708462161, i64* undef, align 8 +; CHECK-NEXT: store float 0.000000e+00, float* undef, align 4 +; CHECK-NEXT: store float 0.000000e+00, float* @g_126, align 4 +; CHECK-NEXT: store i32* null, i32** getelementptr inbounds ([6 x [5 x i32*]], [6 x [5 x i32*]]* @g_355, i64 0, i64 4, i64 4), align 8 +; CHECK-NEXT: br label [[FOR_COND93_LOOPEXIT]] +; +entry: + br label %for.cond93 + +for.cond93.loopexit: ; preds = %for.body198 + ret void + +for.cond93: ; preds = %entry + br label %for.body198 + +for.body198: ; preds = %for.body198, %for.cond93 + %l_249.12 = phi i8 [ undef, %for.cond93 ], [ %call593, %for.body198 ] + %l_522.01 = phi i32 [ 0, %for.cond93 ], [ 1, %for.body198 ] + store i64 0, i64* @g_74, align 8 + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* bitcast ([3 x [3 x [10 x i32]]]* @func_6.l_542 to i8*), i64 360, i32 4, i1 false) + store i64* @g_202, i64** @g_531, align 8 + store i8 0, i8* undef, align 1 + store i8 0, i8* undef, align 1 + store i32* null, i32** @g_623, align 8 + store i32* null, i32** @g_625, align 8 + store i32 0, i32* @g_72, align 4 + %call593 = tail call zeroext i8 @patatino() + store i64 -1, i64* undef, align 8 + store i64 6707501019708462161, i64* undef, align 8 + store float 0.000000e+00, float* undef, align 4 + store float 0.000000e+00, float* @g_126, align 4 + store i32* null, i32** getelementptr inbounds ([6 x [5 x i32*]], [6 x [5 x i32*]]* @g_355, i64 0, i64 4, i64 4), align 8 + br i1 false, label %for.body198, label %for.cond93.loopexit +}