diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -8229,23 +8229,26 @@ // Get the trip count const SCEV *TCExpr = getTripCountFromExitCount(ExitCount); + // If a trip multiple is huge (>=2^32), the trip count is still divisible by + // the greatest power of 2 divisor less than 2^32. + auto GetSmallMultiple = [](unsigned TrailingZeros) { + return 1U << std::min((uint32_t)31, TrailingZeros); + }; + const SCEVConstant *TC = dyn_cast(TCExpr); if (!TC) // Attempt to factor more general cases. Returns the greatest power of - // two divisor. If overflow happens, the trip count expression is still - // divisible by the greatest power of 2 divisor returned. - return 1U << std::min((uint32_t)31, - GetMinTrailingZeros(applyLoopGuards(TCExpr, L))); + // two divisor. + return GetSmallMultiple( + GetMinTrailingZeros(applyLoopGuards(TCExpr, L))); ConstantInt *Result = TC->getValue(); - - // Guard against huge trip counts (this requires checking - // for zero to handle the case where the trip count == -1 and the - // addition wraps). assert(Result && "SCEVConstant expected to have non-null ConstantInt"); - if (Result->getValue().getActiveBits() > 32 || - Result->getValue().getActiveBits() == 0) - return 1; + assert(Result->getValue() != 0 && "trip count should never be zero"); + + // Guard against huge trip multiples. + if (Result->getValue().getActiveBits() > 32) + return GetSmallMultiple(Result->getValue().countTrailingZeros()); return (unsigned)Result->getZExtValue(); } diff --git a/llvm/test/Analysis/ScalarEvolution/huge-trip-multiple.ll b/llvm/test/Analysis/ScalarEvolution/huge-trip-multiple.ll --- a/llvm/test/Analysis/ScalarEvolution/huge-trip-multiple.ll +++ b/llvm/test/Analysis/ScalarEvolution/huge-trip-multiple.ll @@ -1,7 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2 ; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s -; Tests loops with huge trip counts. Trip count of >=2**32 are huge. Huge trip counts have a trip multiple of 1. +; Tests loops with huge trip counts. Trip count of >=2^32 are huge. Huge trip counts have a trip multiple +; of the greatest power of 2 less than 2^32. declare void @foo(...) @@ -47,7 +48,7 @@ ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is 4294967295 ; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is 4294967295 ; CHECK-NEXT: Predicates: -; CHECK: Loop %for.body: Trip multiple is 1 +; CHECK: Loop %for.body: Trip multiple is 2147483648 ; entry: br label %for.body @@ -76,7 +77,7 @@ ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is 8589934591 ; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is 8589934591 ; CHECK-NEXT: Predicates: -; CHECK: Loop %for.body: Trip multiple is 1 +; CHECK: Loop %for.body: Trip multiple is 2147483648 ; entry: br label %for.body @@ -105,7 +106,7 @@ ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is 9223372036854775807 ; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is 9223372036854775807 ; CHECK-NEXT: Predicates: -; CHECK: Loop %for.body: Trip multiple is 1 +; CHECK: Loop %for.body: Trip multiple is 2147483648 ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopUnroll/X86/mmx.ll b/llvm/test/Transforms/LoopUnroll/X86/mmx.ll --- a/llvm/test/Transforms/LoopUnroll/X86/mmx.ll +++ b/llvm/test/Transforms/LoopUnroll/X86/mmx.ll @@ -7,53 +7,21 @@ ; CHECK-LABEL: define x86_mmx @f ; CHECK-SAME: () #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 false, label [[EXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]] -; CHECK: entry.new: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 1, [[ENTRY_NEW]] ], [ [[ADD_7:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[NITER:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[ADD_7:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[PHI]], 1 -; CHECK-NEXT: [[NITER_NEXT:%.*]] = add nuw nsw i32 [[NITER]], 1 ; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i32 [[ADD]], 1 -; CHECK-NEXT: [[NITER_NEXT_1:%.*]] = add nuw nsw i32 [[NITER_NEXT]], 1 ; CHECK-NEXT: [[ADD_2:%.*]] = add nuw nsw i32 [[ADD_1]], 1 -; CHECK-NEXT: [[NITER_NEXT_2:%.*]] = add nuw nsw i32 [[NITER_NEXT_1]], 1 ; CHECK-NEXT: [[ADD_3:%.*]] = add nuw nsw i32 [[ADD_2]], 1 -; CHECK-NEXT: [[NITER_NEXT_3:%.*]] = add nuw nsw i32 [[NITER_NEXT_2]], 1 ; CHECK-NEXT: [[ADD_4:%.*]] = add nuw nsw i32 [[ADD_3]], 1 -; CHECK-NEXT: [[NITER_NEXT_4:%.*]] = add nuw nsw i32 [[NITER_NEXT_3]], 1 ; CHECK-NEXT: [[ADD_5:%.*]] = add nuw nsw i32 [[ADD_4]], 1 -; CHECK-NEXT: [[NITER_NEXT_5:%.*]] = add nuw nsw i32 [[NITER_NEXT_4]], 1 ; CHECK-NEXT: [[ADD_6:%.*]] = add i32 [[ADD_5]], 1 -; CHECK-NEXT: [[NITER_NEXT_6:%.*]] = add nuw nsw i32 [[NITER_NEXT_5]], 1 ; CHECK-NEXT: [[ADD_7]] = add nuw nsw i32 [[ADD_6]], 1 -; CHECK-NEXT: [[NITER_NEXT_7]] = add i32 [[NITER_NEXT_6]], 1 -; CHECK-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i32 [[NITER_NEXT_7]], 0 -; CHECK-NEXT: br i1 [[NITER_NCMP_7]], label [[EXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_BODY]] -; CHECK: exit.unr-lcssa.loopexit: -; CHECK-NEXT: [[RET_PH_PH:%.*]] = phi x86_mmx [ undef, [[FOR_BODY]] ] -; CHECK-NEXT: [[PHI_UNR_PH:%.*]] = phi i32 [ [[ADD_7]], [[FOR_BODY]] ] -; CHECK-NEXT: br label [[EXIT_UNR_LCSSA]] -; CHECK: exit.unr-lcssa: -; CHECK-NEXT: [[RET_PH:%.*]] = phi x86_mmx [ undef, [[ENTRY:%.*]] ], [ [[RET_PH_PH]], [[EXIT_UNR_LCSSA_LOOPEXIT]] ] -; CHECK-NEXT: [[PHI_UNR:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[PHI_UNR_PH]], [[EXIT_UNR_LCSSA_LOOPEXIT]] ] -; CHECK-NEXT: br i1 false, label [[FOR_BODY_EPIL_PREHEADER:%.*]], label [[EXIT:%.*]] -; CHECK: for.body.epil.preheader: -; CHECK-NEXT: br label [[FOR_BODY_EPIL:%.*]] -; CHECK: for.body.epil: -; CHECK-NEXT: [[PHI_EPIL:%.*]] = phi i32 [ [[PHI_UNR]], [[FOR_BODY_EPIL_PREHEADER]] ], [ [[ADD_EPIL:%.*]], [[FOR_BODY_EPIL]] ] -; CHECK-NEXT: [[EPIL_ITER:%.*]] = phi i32 [ 0, [[FOR_BODY_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], [[FOR_BODY_EPIL]] ] -; CHECK-NEXT: [[ADD_EPIL]] = add i32 [[PHI_EPIL]], 1 -; CHECK-NEXT: [[CMP_EPIL:%.*]] = icmp eq i32 [[PHI_EPIL]], 0 -; CHECK-NEXT: [[EPIL_ITER_NEXT]] = add i32 [[EPIL_ITER]], 1 -; CHECK-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i32 [[EPIL_ITER_NEXT]], 0 -; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label [[FOR_BODY_EPIL]], label [[EXIT_EPILOG_LCSSA:%.*]], !llvm.loop [[LOOP0:![0-9]+]] -; CHECK: exit.epilog-lcssa: -; CHECK-NEXT: [[RET_PH1:%.*]] = phi x86_mmx [ undef, [[FOR_BODY_EPIL]] ] -; CHECK-NEXT: br label [[EXIT]] +; CHECK-NEXT: [[CMP_7:%.*]] = icmp eq i32 [[ADD_6]], 0 +; CHECK-NEXT: br i1 [[CMP_7]], label [[EXIT:%.*]], label [[FOR_BODY]] ; CHECK: exit: -; CHECK-NEXT: [[RET:%.*]] = phi x86_mmx [ [[RET_PH]], [[EXIT_UNR_LCSSA]] ], [ [[RET_PH1]], [[EXIT_EPILOG_LCSSA]] ] +; CHECK-NEXT: [[RET:%.*]] = phi x86_mmx [ undef, [[FOR_BODY]] ] ; CHECK-NEXT: ret x86_mmx [[RET]] ; entry: