Index: test/Transforms/LoopUnroll/full-unroll-heuristics-cfg.ll =================================================================== --- /dev/null +++ test/Transforms/LoopUnroll/full-unroll-heuristics-cfg.ll @@ -0,0 +1,156 @@ +; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-absolute-threshold=1000 -unroll-threshold=10 -unroll-percent-of-optimized-for-complete-unroll=50 | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +@known_constant = internal unnamed_addr constant [10 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 16 + +; If a load becomes a constant after loop unrolling, we sometimes can simplify +; CFG. This test verifies that we handle such cases. +; CHECK-LABEL: @branch +; CHECK-NOT: br i1 %exitcond, label %for.end, label %for.body +define i32 @branch(i32* noalias nocapture readonly %b) { +entry: + br label %for.body + +for.body: ; preds = %for.inc, %entry + %iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.inc ] + %r.0 = phi i32 [ 0, %entry ], [ %r.1, %for.inc ] + %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0 + %x1 = load i32, i32* %arrayidx1, align 4 + %cmp = icmp eq i32 %x1, 0 + %iv.1 = add nuw nsw i64 %iv.0, 1 + br i1 %cmp, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv.0 + %x2 = load i32, i32* %arrayidx2, align 4 + %add = add nsw i32 %x2, %r.0 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %r.1 = phi i32 [ %add, %if.then ], [ %x1, %for.body ] + %exitcond = icmp eq i64 %iv.1, 10 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.inc + ret i32 %r.1 +} + +; A branch might also depend on an induction variable - in this case we can +; evaluate it at every iteration, and thus simplify CFG. +; CHECK-LABEL: @branch_iv +; CHECK-NOT: br i1 %exitcond, label %for.end, label %for.body +define i64 @branch_iv(i64* noalias nocapture readonly %b) { +entry: + br label %for.body + +for.body: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %tmp3, %for.inc ] + %r.030 = phi i64 [ 0, %entry ], [ %r.1, %for.inc ] + %cmp3 = icmp eq i64 %indvars.iv, 5 + %tmp3 = add nuw nsw i64 %indvars.iv, 1 + br i1 %cmp3, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %arrayidx2 = getelementptr inbounds i64, i64* %b, i64 %tmp3 + %tmp1 = load i64, i64* %arrayidx2, align 4 + %add = add nsw i64 %tmp1, %r.030 + br label %for.inc + +for.inc: ; preds = %if.then, %for.body + %r.1 = phi i64 [ %add, %if.then ], [ %r.030, %for.body ] + %exitcond = icmp eq i64 %tmp3, 20 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.inc + ret i64 %r.1 +} + +; Induction variables are often casted to another type, and that shouldn't +; prevent us from folding branches. Tthis test specifically checks if we can +; handle this. +; CHECK-LABEL: @branch_iv_trunc +; CHECK-NOT: br i1 %exitcond, label %for.end, label %for.body +define i32 @branch_iv_trunc(i32* noalias nocapture readonly %b) { +entry: + br label %for.body + +for.body: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %tmp3, %for.inc ] + %r.030 = phi i32 [ 0, %entry ], [ %r.1, %for.inc ] + %tmp2 = trunc i64 %indvars.iv to i32 + %cmp3 = icmp eq i32 %tmp2, 5 + %tmp3 = add nuw nsw i64 %indvars.iv, 1 + br i1 %cmp3, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %tmp3 + %tmp1 = load i32, i32* %arrayidx2, align 4 + %add = add nsw i32 %tmp1, %r.030 + br label %for.inc + +for.inc: ; preds = %if.then, %for.body + %r.1 = phi i32 [ %add, %if.then ], [ %r.030, %for.body ] + %exitcond = icmp eq i64 %tmp3, 10 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.inc + ret i32 %r.1 +} + +; In this test %i1024 has a step of 1024, and it's truncated to i8 on every +; iteration. Obviously, result of this trunc is always 0, and we should be able +; to take advantage of it. +; CHECK-LABEL: @iv_trunc +; CHECK: ret i8 0 +define i8 @iv_trunc() { +entry: + br label %for.body + +for.body: + %iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.body ] + %i1024.0 = phi i64 [ 0, %entry ], [ %i1024.1, %for.body ] + %r.0 = phi i8 [ 1, %entry ], [ %r.1, %for.body ] + %iv.1 = add nuw nsw i64 %iv.0, 1 + %i1024.1 = add nuw nsw i64 %i1024.0, 1024 + %trunc = trunc i64 %i1024.1 to i8 + %r.1 = mul i8 %r.0, %trunc + %exitcond = icmp eq i64 %iv.1, 10 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret i8 %r.1 +} + +; Check that we don't crash when we analyze icmp with pointer-typed IV and a pointer. +; CHECK-LABEL: @ptr_cmp +define void @ptr_cmp() { +entry: + br label %while.body + +while.body: + %iv.0 = phi i32* [ getelementptr inbounds ([10 x i32], [10 x i32]* @known_constant, i64 0, i64 0), %entry ], [ %iv.1, %while.body ] + %iv.1 = getelementptr inbounds i32, i32* %iv.0, i64 1 + %exitcond = icmp eq i32* %iv.1, getelementptr inbounds ([10 x i32], [10 x i32]* @known_constant, i64 0, i64 9) + br i1 %exitcond, label %loop.exit, label %while.body + +loop.exit: + ret void +} + +; Check that we don't crash when we analyze ptrtoint cast. +; CHECK-LABEL: @ptrtoint_cast +define void @ptrtoint_cast(i8 * %a) { +entry: + %limit = getelementptr i8, i8* %a, i64 512 + br label %loop.body + +loop.body: + %iv.0 = phi i8* [ %a, %entry ], [ %iv.1, %loop.body ] + %cast = ptrtoint i8* %iv.0 to i64 + %iv.1 = getelementptr inbounds i8, i8* %iv.0, i64 1 + %exitcond = icmp ne i8* %iv.1, %limit + br i1 %exitcond, label %loop.body, label %loop.exit + +loop.exit: + ret void +}