Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1746,13 +1746,15 @@ } } + UP.Runtime = true; + UP.Force = true; + // Enable runtime unrolling for in-order models // If mcpu is omitted, getProcFamily() returns AArch64Subtarget::Others, so by // checking for that case, we can ensure that the default behaviour is // unchanged if (ST->getProcFamily() != AArch64Subtarget::Others && !ST->getSchedModel().isOutOfOrder()) { - UP.Runtime = true; UP.Partial = true; UP.UpperBound = true; UP.UnrollRemainder = true; Index: llvm/test/Transforms/LoopUnroll/AArch64/full-unroll-trip-count-upper-bound.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/AArch64/full-unroll-trip-count-upper-bound.ll +++ llvm/test/Transforms/LoopUnroll/AArch64/full-unroll-trip-count-upper-bound.ll @@ -1,5 +1,4 @@ ; RUN: opt -loop-unroll -S -mtriple aarch64 -mcpu=cortex-a57 %s | FileCheck %s -check-prefix=UNROLL -; RUN: opt -loop-unroll -unroll-max-upperbound=0 -S -mtriple aarch64 -mcpu=cortex-a57 %s | FileCheck %s -check-prefix=NOUNROLL ; This IR comes from this C code: ; @@ -19,9 +18,6 @@ ; UNROLL: load i32, i32* ; UNROLL: load i32, i32* ; UNROLL-NOT: load i32, i32* -; NOUNROLL-LABEL: @test -; NOUNROLL: load i32, i32* -; NOUNROLL-NOT: load i32, i32* define void @test(i32* %dst, i32* %src) { entry: Index: llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll +++ llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll @@ -97,20 +97,93 @@ ; CHECK-GENERIC-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i16, i16* [[ARG_3:%.*]], i64 undef ; CHECK-GENERIC-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, i32* [[ARG_1:%.*]], i64 undef ; CHECK-GENERIC-NEXT: [[CMP52_NOT:%.*]] = icmp eq i32 [[ARG_0:%.*]], 0 -; CHECK-GENERIC-NEXT: br i1 [[CMP52_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY6:%.*]] +; CHECK-GENERIC-NEXT: br i1 [[CMP52_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY6_PREHEADER:%.*]] +; CHECK-GENERIC: for.body6.preheader: +; CHECK-GENERIC-NEXT: [[TMP0:%.*]] = add i32 [[ARG_0]], -1 +; CHECK-GENERIC-NEXT: [[XTRAITER:%.*]] = and i32 [[ARG_0]], 7 +; CHECK-GENERIC-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 7 +; CHECK-GENERIC-NEXT: br i1 [[TMP1]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY6_PREHEADER_NEW:%.*]] +; CHECK-GENERIC: for.body6.preheader.new: +; CHECK-GENERIC-NEXT: [[UNROLL_ITER:%.*]] = and i32 [[ARG_0]], -8 +; CHECK-GENERIC-NEXT: br label [[FOR_BODY6:%.*]] ; CHECK-GENERIC: for.body6: -; CHECK-GENERIC-NEXT: [[K_03:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY6]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-GENERIC-NEXT: [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 -; CHECK-GENERIC-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -; CHECK-GENERIC-NEXT: [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2 -; CHECK-GENERIC-NEXT: [[CONV15:%.*]] = sext i16 [[TMP1]] to i32 +; CHECK-GENERIC-NEXT: [[NITER:%.*]] = phi i32 [ [[UNROLL_ITER]], [[FOR_BODY6_PREHEADER_NEW]] ], [ [[NITER_NSUB_7:%.*]], [[FOR_BODY6]] ] +; CHECK-GENERIC-NEXT: [[TMP2:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 +; CHECK-GENERIC-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +; CHECK-GENERIC-NEXT: [[TMP3:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2 +; CHECK-GENERIC-NEXT: [[CONV15:%.*]] = sext i16 [[TMP3]] to i32 ; CHECK-GENERIC-NEXT: [[MUL16:%.*]] = mul nsw i32 [[CONV15]], [[CONV]] -; CHECK-GENERIC-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX20]], align 4 -; CHECK-GENERIC-NEXT: [[ADD21:%.*]] = add nsw i32 [[MUL16]], [[TMP2]] +; CHECK-GENERIC-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX20]], align 4 +; CHECK-GENERIC-NEXT: [[ADD21:%.*]] = add nsw i32 [[MUL16]], [[TMP4]] ; CHECK-GENERIC-NEXT: store i32 [[ADD21]], i32* [[ARRAYIDX20]], align 4 -; CHECK-GENERIC-NEXT: [[INC]] = add nuw i32 [[K_03]], 1 -; CHECK-GENERIC-NEXT: [[CMP5:%.*]] = icmp ult i32 [[INC]], [[ARG_0]] -; CHECK-GENERIC-NEXT: br i1 [[CMP5]], label [[FOR_BODY6]], label [[FOR_END]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-GENERIC-NEXT: [[TMP5:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 +; CHECK-GENERIC-NEXT: [[CONV_1:%.*]] = sext i16 [[TMP5]] to i32 +; CHECK-GENERIC-NEXT: [[TMP6:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2 +; CHECK-GENERIC-NEXT: [[CONV15_1:%.*]] = sext i16 [[TMP6]] to i32 +; CHECK-GENERIC-NEXT: [[MUL16_1:%.*]] = mul nsw i32 [[CONV15_1]], [[CONV_1]] +; CHECK-GENERIC-NEXT: [[ADD21_1:%.*]] = add nsw i32 [[MUL16_1]], [[ADD21]] +; CHECK-GENERIC-NEXT: store i32 [[ADD21_1]], i32* [[ARRAYIDX20]], align 4 +; CHECK-GENERIC-NEXT: [[TMP7:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 +; CHECK-GENERIC-NEXT: [[CONV_2:%.*]] = sext i16 [[TMP7]] to i32 +; CHECK-GENERIC-NEXT: [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2 +; CHECK-GENERIC-NEXT: [[CONV15_2:%.*]] = sext i16 [[TMP8]] to i32 +; CHECK-GENERIC-NEXT: [[MUL16_2:%.*]] = mul nsw i32 [[CONV15_2]], [[CONV_2]] +; CHECK-GENERIC-NEXT: [[ADD21_2:%.*]] = add nsw i32 [[MUL16_2]], [[ADD21_1]] +; CHECK-GENERIC-NEXT: store i32 [[ADD21_2]], i32* [[ARRAYIDX20]], align 4 +; CHECK-GENERIC-NEXT: [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 +; CHECK-GENERIC-NEXT: [[CONV_3:%.*]] = sext i16 [[TMP9]] to i32 +; CHECK-GENERIC-NEXT: [[TMP10:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2 +; CHECK-GENERIC-NEXT: [[CONV15_3:%.*]] = sext i16 [[TMP10]] to i32 +; CHECK-GENERIC-NEXT: [[MUL16_3:%.*]] = mul nsw i32 [[CONV15_3]], [[CONV_3]] +; CHECK-GENERIC-NEXT: [[ADD21_3:%.*]] = add nsw i32 [[MUL16_3]], [[ADD21_2]] +; CHECK-GENERIC-NEXT: store i32 [[ADD21_3]], i32* [[ARRAYIDX20]], align 4 +; CHECK-GENERIC-NEXT: [[TMP11:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 +; CHECK-GENERIC-NEXT: [[CONV_4:%.*]] = sext i16 [[TMP11]] to i32 +; CHECK-GENERIC-NEXT: [[TMP12:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2 +; CHECK-GENERIC-NEXT: [[CONV15_4:%.*]] = sext i16 [[TMP12]] to i32 +; CHECK-GENERIC-NEXT: [[MUL16_4:%.*]] = mul nsw i32 [[CONV15_4]], [[CONV_4]] +; CHECK-GENERIC-NEXT: [[ADD21_4:%.*]] = add nsw i32 [[MUL16_4]], [[ADD21_3]] +; CHECK-GENERIC-NEXT: store i32 [[ADD21_4]], i32* [[ARRAYIDX20]], align 4 +; CHECK-GENERIC-NEXT: [[TMP13:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 +; CHECK-GENERIC-NEXT: [[CONV_5:%.*]] = sext i16 [[TMP13]] to i32 +; CHECK-GENERIC-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2 +; CHECK-GENERIC-NEXT: [[CONV15_5:%.*]] = sext i16 [[TMP14]] to i32 +; CHECK-GENERIC-NEXT: [[MUL16_5:%.*]] = mul nsw i32 [[CONV15_5]], [[CONV_5]] +; CHECK-GENERIC-NEXT: [[ADD21_5:%.*]] = add nsw i32 [[MUL16_5]], [[ADD21_4]] +; CHECK-GENERIC-NEXT: store i32 [[ADD21_5]], i32* [[ARRAYIDX20]], align 4 +; CHECK-GENERIC-NEXT: [[TMP15:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 +; CHECK-GENERIC-NEXT: [[CONV_6:%.*]] = sext i16 [[TMP15]] to i32 +; CHECK-GENERIC-NEXT: [[TMP16:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2 +; CHECK-GENERIC-NEXT: [[CONV15_6:%.*]] = sext i16 [[TMP16]] to i32 +; CHECK-GENERIC-NEXT: [[MUL16_6:%.*]] = mul nsw i32 [[CONV15_6]], [[CONV_6]] +; CHECK-GENERIC-NEXT: [[ADD21_6:%.*]] = add nsw i32 [[MUL16_6]], [[ADD21_5]] +; CHECK-GENERIC-NEXT: store i32 [[ADD21_6]], i32* [[ARRAYIDX20]], align 4 +; CHECK-GENERIC-NEXT: [[TMP17:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 +; CHECK-GENERIC-NEXT: [[CONV_7:%.*]] = sext i16 [[TMP17]] to i32 +; CHECK-GENERIC-NEXT: [[TMP18:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2 +; CHECK-GENERIC-NEXT: [[CONV15_7:%.*]] = sext i16 [[TMP18]] to i32 +; CHECK-GENERIC-NEXT: [[MUL16_7:%.*]] = mul nsw i32 [[CONV15_7]], [[CONV_7]] +; CHECK-GENERIC-NEXT: [[ADD21_7:%.*]] = add nsw i32 [[MUL16_7]], [[ADD21_6]] +; CHECK-GENERIC-NEXT: store i32 [[ADD21_7]], i32* [[ARRAYIDX20]], align 4 +; CHECK-GENERIC-NEXT: [[NITER_NSUB_7]] = add i32 [[NITER]], -8 +; CHECK-GENERIC-NEXT: [[NITER_NCMP_7_NOT:%.*]] = icmp eq i32 [[NITER_NSUB_7]], 0 +; CHECK-GENERIC-NEXT: br i1 [[NITER_NCMP_7_NOT]], label [[FOR_END_LOOPEXIT_UNR_LCSSA]], label [[FOR_BODY6]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-GENERIC: for.end.loopexit.unr-lcssa: +; CHECK-GENERIC-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i32 [[XTRAITER]], 0 +; CHECK-GENERIC-NEXT: br i1 [[LCMP_MOD_NOT]], label [[FOR_END]], label [[FOR_BODY6_EPIL:%.*]] +; CHECK-GENERIC: for.body6.epil: +; CHECK-GENERIC-NEXT: [[EPIL_ITER:%.*]] = phi i32 [ [[EPIL_ITER_SUB:%.*]], [[FOR_BODY6_EPIL]] ], [ [[XTRAITER]], [[FOR_END_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-GENERIC-NEXT: [[TMP19:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 +; CHECK-GENERIC-NEXT: [[CONV_EPIL:%.*]] = sext i16 [[TMP19]] to i32 +; CHECK-GENERIC-NEXT: [[TMP20:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2 +; CHECK-GENERIC-NEXT: [[CONV15_EPIL:%.*]] = sext i16 [[TMP20]] to i32 +; CHECK-GENERIC-NEXT: [[MUL16_EPIL:%.*]] = mul nsw i32 [[CONV15_EPIL]], [[CONV_EPIL]] +; CHECK-GENERIC-NEXT: [[TMP21:%.*]] = load i32, i32* [[ARRAYIDX20]], align 4 +; CHECK-GENERIC-NEXT: [[ADD21_EPIL:%.*]] = add nsw i32 [[MUL16_EPIL]], [[TMP21]] +; CHECK-GENERIC-NEXT: store i32 [[ADD21_EPIL]], i32* [[ARRAYIDX20]], align 4 +; CHECK-GENERIC-NEXT: [[EPIL_ITER_SUB]] = add i32 [[EPIL_ITER]], -1 +; CHECK-GENERIC-NEXT: [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i32 [[EPIL_ITER_SUB]], 0 +; CHECK-GENERIC-NEXT: br i1 [[EPIL_ITER_CMP_NOT]], label [[FOR_END]], label [[FOR_BODY6_EPIL]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK-GENERIC: for.end: ; CHECK-GENERIC-NEXT: ret void ; Index: llvm/test/Transforms/LoopUnroll/AArch64/unroll-loop-with-pointer-type-iv.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopUnroll/AArch64/unroll-loop-with-pointer-type-iv.ll @@ -0,0 +1,71 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -passes=loop-unroll | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +declare i32 @foo(i8 *) + +define void @test(i8* %s, i64 %a) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S_ADDR_A:%.*]] = getelementptr i8, i8* [[S:%.*]], i64 [[A:%.*]] +; CHECK-NEXT: br label [[WHILE_BODY_US:%.*]] +; CHECK: while.body.us: +; CHECK-NEXT: [[S_ADDR:%.*]] = phi i8* [ [[S]], [[ENTRY:%.*]] ], [ [[INCDEC_PTR_7:%.*]], [[WHILE_BODY_US_7:%.*]] ] +; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i8, i8* [[S_ADDR]], i64 1 +; CHECK-NEXT: [[INCDEC_VAL:%.*]] = load i8, i8* [[S_ADDR]], align 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[INCDEC_VAL]], 10 +; CHECK-NEXT: br i1 [[CMP1]], label [[RETURN_LOOPEXIT:%.*]], label [[WHILE_BODY_US_1:%.*]] +; CHECK: return.loopexit: +; CHECK-NEXT: ret void +; CHECK: while.body.us.1: +; CHECK-NEXT: [[INCDEC_PTR_1:%.*]] = getelementptr inbounds i8, i8* [[INCDEC_PTR]], i64 1 +; CHECK-NEXT: [[INCDEC_VAL_1:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1 +; CHECK-NEXT: [[CMP1_1:%.*]] = icmp eq i8 [[INCDEC_VAL_1]], 10 +; CHECK-NEXT: br i1 [[CMP1_1]], label [[RETURN_LOOPEXIT]], label [[WHILE_BODY_US_2:%.*]] +; CHECK: while.body.us.2: +; CHECK-NEXT: [[INCDEC_PTR_2:%.*]] = getelementptr inbounds i8, i8* [[INCDEC_PTR_1]], i64 1 +; CHECK-NEXT: [[INCDEC_VAL_2:%.*]] = load i8, i8* [[INCDEC_PTR_1]], align 1 +; CHECK-NEXT: [[CMP1_2:%.*]] = icmp eq i8 [[INCDEC_VAL_2]], 10 +; CHECK-NEXT: br i1 [[CMP1_2]], label [[RETURN_LOOPEXIT]], label [[WHILE_BODY_US_3:%.*]] +; CHECK: while.body.us.3: +; CHECK-NEXT: [[INCDEC_PTR_3:%.*]] = getelementptr inbounds i8, i8* [[INCDEC_PTR_2]], i64 1 +; CHECK-NEXT: [[INCDEC_VAL_3:%.*]] = load i8, i8* [[INCDEC_PTR_2]], align 1 +; CHECK-NEXT: [[CMP1_3:%.*]] = icmp eq i8 [[INCDEC_VAL_3]], 10 +; CHECK-NEXT: br i1 [[CMP1_3]], label [[RETURN_LOOPEXIT]], label [[WHILE_BODY_US_4:%.*]] +; CHECK: while.body.us.4: +; CHECK-NEXT: [[INCDEC_PTR_4:%.*]] = getelementptr inbounds i8, i8* [[INCDEC_PTR_3]], i64 1 +; CHECK-NEXT: [[INCDEC_VAL_4:%.*]] = load i8, i8* [[INCDEC_PTR_3]], align 1 +; CHECK-NEXT: [[CMP1_4:%.*]] = icmp eq i8 [[INCDEC_VAL_4]], 10 +; CHECK-NEXT: br i1 [[CMP1_4]], label [[RETURN_LOOPEXIT]], label [[WHILE_BODY_US_5:%.*]] +; CHECK: while.body.us.5: +; CHECK-NEXT: [[INCDEC_PTR_5:%.*]] = getelementptr inbounds i8, i8* [[INCDEC_PTR_4]], i64 1 +; CHECK-NEXT: [[INCDEC_VAL_5:%.*]] = load i8, i8* [[INCDEC_PTR_4]], align 1 +; CHECK-NEXT: [[CMP1_5:%.*]] = icmp eq i8 [[INCDEC_VAL_5]], 10 +; CHECK-NEXT: br i1 [[CMP1_5]], label [[RETURN_LOOPEXIT]], label [[WHILE_BODY_US_6:%.*]] +; CHECK: while.body.us.6: +; CHECK-NEXT: [[INCDEC_PTR_6:%.*]] = getelementptr inbounds i8, i8* [[INCDEC_PTR_5]], i64 1 +; CHECK-NEXT: [[INCDEC_VAL_6:%.*]] = load i8, i8* [[INCDEC_PTR_5]], align 1 +; CHECK-NEXT: [[CMP1_6:%.*]] = icmp eq i8 [[INCDEC_VAL_6]], 10 +; CHECK-NEXT: br i1 [[CMP1_6]], label [[RETURN_LOOPEXIT]], label [[WHILE_BODY_US_7]] +; CHECK: while.body.us.7: +; CHECK-NEXT: [[INCDEC_PTR_7]] = getelementptr inbounds i8, i8* [[INCDEC_PTR_6]], i64 1 +; CHECK-NEXT: [[INCDEC_VAL_7:%.*]] = load i8, i8* [[INCDEC_PTR_6]], align 1 +; CHECK-NEXT: [[CMP1_7:%.*]] = icmp eq i8 [[INCDEC_VAL_7]], 10 +; CHECK-NEXT: br i1 [[CMP1_7]], label [[RETURN_LOOPEXIT]], label [[WHILE_BODY_US]] +; +entry: + %s.addr.a = getelementptr i8, i8* %s, i64 %a + br label %while.body.us + +while.body.us: + %s.addr = phi i8* [ %incdec.ptr, %while.body.us ], [ %s, %entry ] + %incdec.ptr = getelementptr inbounds i8, i8* %s.addr, i64 1 + %incdec.val = load i8, i8* %s.addr, align 1 + %cmp1 = icmp eq i8 %incdec.val, 10 + br i1 %cmp1, label %return.loopexit, label %while.body.us + +return.loopexit: + ret void +} +