diff --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-fold-iv-complicate-add-rec.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-fold-iv-complicate-add-rec.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-fold-iv-complicate-add-rec.ll @@ -0,0 +1,61 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; This is compiled from the following code +; #include +; int *sv_2mortal(int*); +; void Perl_pp_slice(int **mark, int length) { +; int **dst = mark + 3; +; for (int i = length; i; i--) { +; sv_2mortal(*dst); /* free them eventualy */ +; dst++; +; } +; } + +; RUN: opt < %s -passes="loop-reduce" -S | FileCheck %s + +target datalayout = "e-p:32:32:32-n32" + +define void @Perl_pp_slice(ptr %mark, i32 signext %length) { +; CHECK-LABEL: @Perl_pp_slice( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TOBOOL_NOT3:%.*]] = icmp eq i32 [[LENGTH:%.*]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT3]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds ptr, ptr [[MARK:%.*]] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ [[DEC:%.*]], [[FOR_BODY]] ], [ [[LENGTH]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[DST_04:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[ADD_PTR]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DST_04]], align 8 +; CHECK-NEXT: call void @sv_2mortal(ptr [[TMP0]]) +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds ptr, ptr [[DST_04]], i64 1 +; CHECK-NEXT: [[DEC]] = add i32 [[I_05]], -1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]] +; +entry: + %tobool.not3 = icmp eq i32 %length, 0 + br i1 %tobool.not3, label %for.cond.cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + %add.ptr = getelementptr inbounds ptr, ptr %mark + br label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + ret void + +for.body: ; preds = %for.body, %for.body.preheader + %i.05 = phi i32 [ %dec, %for.body ], [ %length, %for.body.preheader ] + %dst.04 = phi ptr [ %incdec.ptr, %for.body ], [ %add.ptr, %for.body.preheader ] + %0 = load ptr, ptr %dst.04, align 8 + call void @sv_2mortal(ptr %0) + %incdec.ptr = getelementptr inbounds ptr, ptr %dst.04, i64 1 + %dec = add nsw i32 %i.05, -1 + %tobool.not = icmp eq i32 %dec, 0 + br i1 %tobool.not, label %for.cond.cleanup, label %for.body +} + +declare void @sv_2mortal(ptr) diff --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-fold-iv-const-tripcount.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-fold-iv-const-tripcount.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-fold-iv-const-tripcount.ll @@ -0,0 +1,112 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; This is compiled from the following code +; typedef unsigned char Intersection; +; extern Intersection board[421]; +; void close_bubbles(int gb[400], int bubbles[400]) { +; int ii; +; for (ii = 21; ii < 400; ii++) { +; if (!(board[ii] != 3) || gb[ii]) +; continue; +; if (bubbles[ii] == 1) +; gb[ii] = 1; +; if (bubbles[ii] == 2) +; gb[ii] = -1; +; } +; } + +; RUN: opt < %s -passes="loop-reduce" -S | FileCheck %s + +@board = external global [421 x i8], align 8 + +define void @close_bubbles(ptr %gb, ptr %bubbles) { +; CHECK-LABEL: @close_bubbles( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[BUBBLES:%.*]], i64 84 +; CHECK-NEXT: [[UGLYGEP3:%.*]] = getelementptr i8, ptr [[GB:%.*]], i64 84 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[LSR_IV6:%.*]] = phi ptr [ [[UGLYGEP7:%.*]], [[FOR_INC:%.*]] ], [ getelementptr (i8, ptr @board, i64 21), [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV4:%.*]] = phi ptr [ [[UGLYGEP5:%.*]], [[FOR_INC]] ], [ [[UGLYGEP3]], [[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_INC]] ], [ [[UGLYGEP]], [[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_INC]] ], [ 379, [[ENTRY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[LSR_IV6]], align 1 +; CHECK-NEXT: [[CMP1_NOT:%.*]] = icmp eq i8 [[TMP0]], 3 +; CHECK-NEXT: br i1 [[CMP1_NOT]], label [[FOR_INC]], label [[LOR_LHS_FALSE:%.*]] +; CHECK: lor.lhs.false: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[LSR_IV4]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[FOR_INC]] +; CHECK: if.end: +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[LSR_IV1]], align 4 +; CHECK-NEXT: [[CMP7:%.*]] = icmp eq i32 [[TMP2]], 1 +; CHECK-NEXT: br i1 [[CMP7]], label [[IF_THEN9:%.*]], label [[IF_END12:%.*]] +; CHECK: if.then9: +; CHECK-NEXT: store i32 1, ptr [[LSR_IV4]], align 4 +; CHECK-NEXT: [[DOTPR:%.*]] = load i32, ptr [[LSR_IV1]], align 4 +; CHECK-NEXT: br label [[IF_END12]] +; CHECK: if.end12: +; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ [[DOTPR]], [[IF_THEN9]] ], [ [[TMP2]], [[IF_END]] ] +; CHECK-NEXT: [[CMP15:%.*]] = icmp eq i32 [[TMP3]], 2 +; CHECK-NEXT: br i1 [[CMP15]], label [[IF_THEN17:%.*]], label [[FOR_INC]] +; CHECK: if.then17: +; CHECK-NEXT: store i32 -1, ptr [[LSR_IV4]], align 4 +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], -1 +; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 +; CHECK-NEXT: [[UGLYGEP5]] = getelementptr i8, ptr [[LSR_IV4]], i64 4 +; CHECK-NEXT: [[UGLYGEP7]] = getelementptr i8, ptr [[LSR_IV6]], i64 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + %uglygep = getelementptr i8, ptr %bubbles, i64 84 + %uglygep3 = getelementptr i8, ptr %gb, i64 84 + br label %for.body + +for.body: ; preds = %for.inc, %entry + %lsr.iv6 = phi ptr [ %uglygep7, %for.inc ], [ getelementptr (i8, ptr @board, i64 21), %entry ] + %lsr.iv4 = phi ptr [ %uglygep5, %for.inc ], [ %uglygep3, %entry ] + %lsr.iv1 = phi ptr [ %uglygep2, %for.inc ], [ %uglygep, %entry ] + %lsr.iv = phi i64 [ %lsr.iv.next, %for.inc ], [ 379, %entry ] + %0 = load i8, ptr %lsr.iv6, align 1 + %cmp1.not = icmp eq i8 %0, 3 + br i1 %cmp1.not, label %for.inc, label %lor.lhs.false + +lor.lhs.false: ; preds = %for.body + %1 = load i32, ptr %lsr.iv4, align 4 + %tobool.not = icmp eq i32 %1, 0 + br i1 %tobool.not, label %if.end, label %for.inc + +if.end: ; preds = %lor.lhs.false + %2 = load i32, ptr %lsr.iv1, align 4 + %cmp7 = icmp eq i32 %2, 1 + br i1 %cmp7, label %if.then9, label %if.end12 + +if.then9: ; preds = %if.end + store i32 1, ptr %lsr.iv4, align 4 + %.pr = load i32, ptr %lsr.iv1, align 4 + br label %if.end12 + +if.end12: ; preds = %if.then9, %if.end + %3 = phi i32 [ %.pr, %if.then9 ], [ %2, %if.end ] + %cmp15 = icmp eq i32 %3, 2 + br i1 %cmp15, label %if.then17, label %for.inc + +if.then17: ; preds = %if.end12 + store i32 -1, ptr %lsr.iv4, align 4 + br label %for.inc + +for.inc: ; preds = %if.then17, %if.end12, %lor.lhs.false, %for.body + %lsr.iv.next = add nsw i64 %lsr.iv, -1 + %uglygep2 = getelementptr i8, ptr %lsr.iv1, i64 4 + %uglygep5 = getelementptr i8, ptr %lsr.iv4, i64 4 + %uglygep7 = getelementptr i8, ptr %lsr.iv6, i64 1 + %exitcond.not = icmp eq i64 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.inc + ret void +} diff --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-fold-iv-runtime-tripcount.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-fold-iv-runtime-tripcount.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-fold-iv-runtime-tripcount.ll @@ -0,0 +1,128 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; This is compiled from the following code +; typedef unsigned char Intersection; +; extern Intersection board[421]; +; void close_bubbles(int gb[400], int bubbles[400]) { +; int ii; +; for (ii = 21; ii < 400; ii++) { +; if (!(board[ii] != 3) || gb[ii]) +; continue; +; if (bubbles[ii] == 1) +; gb[ii] = 1; +; if (bubbles[ii] == 2) +; gb[ii] = -1; +; } +; } + +; RUN: opt < %s -passes="loop-reduce" -S | FileCheck %s + +target datalayout = "e-p:32:32:32-n32" + +@Alphabet_iupac = global i32 0, align 4 +@Alphabet = global [25 x i8] zeroinitializer, align 8 + +define ptr @DigitizeSequence(ptr %seq, i32 signext %L) { +; CHECK-LABEL: @DigitizeSequence( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[L:%.*]], 2 +; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 +; CHECK-NEXT: [[CALL:%.*]] = tail call noalias ptr @malloc(i64 [[CONV]]) +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @Alphabet_iupac, align 4 +; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[TMP0]] to i8 +; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[L]], 1 +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[ADD2]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[CALL]], i64 [[IDXPROM]] +; CHECK-NEXT: store i8 [[CONV1]], ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: store i8 [[CONV1]], ptr [[CALL]], align 1 +; CHECK-NEXT: [[CMP_NOT31:%.*]] = icmp slt i32 [[L]], 1 +; CHECK-NEXT: br i1 [[CMP_NOT31]], label [[FOR_END:%.*]], label [[FOR_BODY_LR_PH:%.*]] +; CHECK: for.body.lr.ph: +; CHECK-NEXT: [[CALL5:%.*]] = tail call ptr @__ctype_toupper_loc() +; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[CALL5]], align 8 +; CHECK-NEXT: [[SUB16:%.*]] = add i32 [[TMP0]], 255 +; CHECK-NEXT: [[CONV1730:%.*]] = zext i32 [[SUB16]] to i64 +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[ADD2]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1 +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[CALL]], i64 1 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[LSR_IV3:%.*]] = phi ptr [ [[UGLYGEP4:%.*]], [[FOR_BODY]] ], [ [[SEQ:%.*]], [[FOR_BODY_LR_PH]] ] +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[FOR_BODY_LR_PH]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[TMP2]], [[FOR_BODY_LR_PH]] ] +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[LSR_IV3]], align 1 +; CHECK-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP3]] to i64 +; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM9]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX10]], align 4 +; CHECK-NEXT: [[CONV12:%.*]] = and i32 [[TMP4]], 255 +; CHECK-NEXT: [[CALL13:%.*]] = tail call ptr @strchr(ptr @Alphabet, i32 signext [[CONV12]]) +; CHECK-NEXT: [[CMP14:%.*]] = icmp eq ptr [[CALL13]], null +; CHECK-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[CALL13]] to i64 +; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], ptrtoint (ptr @Alphabet to i64) +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP14]], i64 [[CONV1730]], i64 [[SUB_PTR_SUB]] +; CHECK-NEXT: [[CONV18:%.*]] = trunc i64 [[COND]] to i8 +; CHECK-NEXT: store i8 [[CONV18]], ptr [[LSR_IV1]], align 1 +; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], -1 +; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 1 +; CHECK-NEXT: [[UGLYGEP4]] = getelementptr i8, ptr [[LSR_IV3]], i64 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]] +; CHECK: for.end.loopexit: +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: ret ptr [[CALL]] +; +entry: + %add = add nsw i32 %L, 2 + %conv = sext i32 %add to i64 + %call = tail call noalias ptr @malloc(i64 %conv) + %0 = load i32, ptr @Alphabet_iupac, align 4 + %conv1 = trunc i32 %0 to i8 + %add2 = add i32 %L, 1 + %idxprom = sext i32 %add2 to i64 + %arrayidx = getelementptr inbounds i8, ptr %call, i64 %idxprom + store i8 %conv1, ptr %arrayidx, align 1 + store i8 %conv1, ptr %call, align 1 + %cmp.not31 = icmp slt i32 %L, 1 + br i1 %cmp.not31, label %for.end, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %entry + %call5 = tail call ptr @__ctype_toupper_loc() + %1 = load ptr, ptr %call5, align 8 + %sub16 = add i32 %0, 255 + %conv1730 = zext i32 %sub16 to i64 + %wide.trip.count = zext i32 %add2 to i64 + %2 = add nsw i64 %wide.trip.count, -1 + %uglygep = getelementptr i8, ptr %call, i64 1 + br label %for.body + +for.body: ; preds = %for.body, %for.body.lr.ph + %lsr.iv3 = phi ptr [ %uglygep4, %for.body ], [ %seq, %for.body.lr.ph ] + %lsr.iv1 = phi ptr [ %uglygep2, %for.body ], [ %uglygep, %for.body.lr.ph ] + %lsr.iv = phi i64 [ %lsr.iv.next, %for.body ], [ %2, %for.body.lr.ph ] + %3 = load i8, ptr %lsr.iv3, align 1 + %idxprom9 = zext i8 %3 to i64 + %arrayidx10 = getelementptr inbounds i32, ptr %1, i64 %idxprom9 + %4 = load i32, ptr %arrayidx10, align 4 + %conv12 = and i32 %4, 255 + %call13 = tail call ptr @strchr(ptr @Alphabet, i32 signext %conv12) + %cmp14 = icmp eq ptr %call13, null + %sub.ptr.lhs.cast = ptrtoint ptr %call13 to i64 + %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, ptrtoint (ptr @Alphabet to i64) + %cond = select i1 %cmp14, i64 %conv1730, i64 %sub.ptr.sub + %conv18 = trunc i64 %cond to i8 + store i8 %conv18, ptr %lsr.iv1, align 1 + %lsr.iv.next = add nsw i64 %lsr.iv, -1 + %uglygep2 = getelementptr i8, ptr %lsr.iv1, i64 1 + %uglygep4 = getelementptr i8, ptr %lsr.iv3, i64 1 + %exitcond.not = icmp eq i64 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret ptr %call +} + +declare ptr @malloc(i64) + +declare ptr @strchr(ptr, i32 signext) + +declare ptr @__ctype_toupper_loc()