diff --git a/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/llvm/lib/Analysis/ScalarEvolutionExpander.cpp --- a/llvm/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Analysis/ScalarEvolutionExpander.cpp @@ -2199,20 +2199,27 @@ // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or // HowManyLessThans produced to compute a precise expression, rather than a // UDiv from the user's code. If we can't find a UDiv in the code with some - // simple searching, assume the former consider UDivExpr expensive to - // compute. - BasicBlock *ExitingBB = L->getExitingBlock(); - if (!ExitingBB) - return true; + // simple searching, we need to account for it's cost. - // At the beginning of this function we already tried to find existing value - // for plain 'S'. Now try to lookup 'S + 1' since it is common pattern - // involving division. This is just a simple search heuristic. - if (!At) - At = &ExitingBB->back(); - if (!getRelatedExistingExpansion( - SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), At, L)) - return true; + BasicBlock *ExitingBB = L->getExitingBlock(); + if (At || ExitingBB) { + if (!At) + At = &ExitingBB->back(); + + // At the beginning of this function we already tried to find existing + // value for plain 'S'. Now try to lookup 'S + 1' since it is common + // pattern involving division. This is just a simple search heuristic. + if (getRelatedExistingExpansion( + SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), At, L)) + return false; // Consider it to be free. + } + + // Need to count the cost of this UDiv. + BudgetRemaining -= TTI->getOperationCost(Instruction::UDiv, S->getType()); + return isHighCostExpansionHelper(UDivExpr->getLHS(), L, At, BudgetRemaining, + TTI, Processed) || + isHighCostExpansionHelper(UDivExpr->getRHS(), L, At, BudgetRemaining, + TTI, Processed); } // HowManyLessThans uses a Max expression whenever the loop is not guarded by diff --git a/llvm/test/Transforms/IndVarSimplify/exit_value_test2.ll b/llvm/test/Transforms/IndVarSimplify/exit_value_test2.ll --- a/llvm/test/Transforms/IndVarSimplify/exit_value_test2.ll +++ b/llvm/test/Transforms/IndVarSimplify/exit_value_test2.ll @@ -13,22 +13,25 @@ ; CHECK-LABEL: @_Z3fooPKcjj( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP:%.*]] = bitcast i32* [[A]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP]]) +; CHECK-NEXT: [[T:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[T]]) ; CHECK-NEXT: store i32 -1640531527, i32* [[A]], align 4 ; CHECK-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[LEN:%.*]], 11 ; CHECK-NEXT: br i1 [[CMP8]], label [[WHILE_BODY_LR_PH:%.*]], label [[WHILE_END:%.*]] ; CHECK: while.body.lr.ph: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], -12 +; CHECK-NEXT: [[TMP1:%.*]] = udiv i32 [[TMP0]], 12 +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], 12 ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[KEYLEN_010:%.*]] = phi i32 [ [[LEN]], [[WHILE_BODY_LR_PH]] ], [ [[SUB:%.*]], [[WHILE_BODY]] ] ; CHECK-NEXT: [[S_ADDR_09:%.*]] = phi i8* [ [[S:%.*]], [[WHILE_BODY_LR_PH]] ], [ [[ADD_PTR:%.*]], [[WHILE_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[S_ADDR_09]] to i32* -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -; CHECK-NEXT: [[SHL_I:%.*]] = shl i32 [[TMP2]], 1 +; CHECK-NEXT: [[T1:%.*]] = bitcast i8* [[S_ADDR_09]] to i32* +; CHECK-NEXT: [[T2:%.*]] = load i32, i32* [[T1]], align 4 +; CHECK-NEXT: [[SHL_I:%.*]] = shl i32 [[T2]], 1 ; CHECK-NEXT: [[AND_I:%.*]] = and i32 [[SHL_I]], 16843008 -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 -; CHECK-NEXT: [[SUB_I:%.*]] = add i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[T3:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[SUB_I:%.*]] = add i32 [[T3]], [[T2]] ; CHECK-NEXT: [[ADD:%.*]] = sub i32 [[SUB_I]], [[AND_I]] ; CHECK-NEXT: store i32 [[ADD]], i32* [[A]], align 4 ; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i8, i8* [[S_ADDR_09]], i64 12 @@ -36,19 +39,19 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[SUB]], 11 ; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_BODY]], label [[WHILE_COND_WHILE_END_CRIT_EDGE:%.*]] ; CHECK: while.cond.while.end_crit_edge: -; CHECK-NEXT: [[SUB_LCSSA:%.*]] = phi i32 [ [[SUB]], [[WHILE_BODY]] ] +; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[TMP0]], [[TMP2]] ; CHECK-NEXT: br label [[WHILE_END]] ; CHECK: while.end: -; CHECK-NEXT: [[KEYLEN_0_LCSSA:%.*]] = phi i32 [ [[SUB_LCSSA]], [[WHILE_COND_WHILE_END_CRIT_EDGE]] ], [ [[LEN]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[KEYLEN_0_LCSSA:%.*]] = phi i32 [ [[TMP3]], [[WHILE_COND_WHILE_END_CRIT_EDGE]] ], [ [[LEN]], [[ENTRY:%.*]] ] ; CHECK-NEXT: call void @_Z3mixRjj(i32* dereferenceable(4) [[A]], i32 [[KEYLEN_0_LCSSA]]) -; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP]]) -; CHECK-NEXT: ret i32 [[TMP4]] +; CHECK-NEXT: [[T4:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[T]]) +; CHECK-NEXT: ret i32 [[T4]] ; entry: %a = alloca i32, align 4 - %tmp = bitcast i32* %a to i8* - call void @llvm.lifetime.start.p0i8(i64 4, i8* %tmp) + %t = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %t) store i32 -1640531527, i32* %a, align 4 %cmp8 = icmp ugt i32 %len, 11 br i1 %cmp8, label %while.body.lr.ph, label %while.end @@ -59,12 +62,12 @@ while.body: ; preds = %while.body, %while.body.lr.ph %keylen.010 = phi i32 [ %len, %while.body.lr.ph ], [ %sub, %while.body ] %s.addr.09 = phi i8* [ %s, %while.body.lr.ph ], [ %add.ptr, %while.body ] - %tmp1 = bitcast i8* %s.addr.09 to i32* - %tmp2 = load i32, i32* %tmp1, align 4 - %shl.i = shl i32 %tmp2, 1 + %t1 = bitcast i8* %s.addr.09 to i32* + %t2 = load i32, i32* %t1, align 4 + %shl.i = shl i32 %t2, 1 %and.i = and i32 %shl.i, 16843008 - %tmp3 = load i32, i32* %a, align 4 - %sub.i = add i32 %tmp3, %tmp2 + %t3 = load i32, i32* %a, align 4 + %sub.i = add i32 %t3, %t2 %add = sub i32 %sub.i, %and.i store i32 %add, i32* %a, align 4 %add.ptr = getelementptr inbounds i8, i8* %s.addr.09, i64 12 @@ -79,9 +82,9 @@ while.end: ; preds = %while.cond.while.end_crit_edge, %entry %keylen.0.lcssa = phi i32 [ %sub.lcssa, %while.cond.while.end_crit_edge ], [ %len, %entry ] call void @_Z3mixRjj(i32* dereferenceable(4) %a, i32 %keylen.0.lcssa) - %tmp4 = load i32, i32* %a, align 4 - call void @llvm.lifetime.end.p0i8(i64 4, i8* %tmp) - ret i32 %tmp4 + %t4 = load i32, i32* %a, align 4 + call void @llvm.lifetime.end.p0i8(i64 4, i8* %t) + ret i32 %t4 } define i32 @zero_backedge_count_test(i32 %unknown_init, i32* %unknown_mem) {