diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -35,6 +35,7 @@ #include "llvm/Transforms/Scalar/SCCP.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/SizeOpts.h" +#include using namespace llvm; @@ -269,8 +270,8 @@ Cost += getUserBonus(User, TTI, LI); // Increase the cost if it is inside the loop. - auto LoopDepth = LI.getLoopDepth(I->getParent()) + 1; - Cost *= (AvgLoopIterationCount ^ LoopDepth); + auto LoopDepth = LI.getLoopDepth(I->getParent()); + Cost *= std::pow((double)AvgLoopIterationCount, LoopDepth); return Cost; } diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-loop.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-loop.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-loop.ll @@ -0,0 +1,63 @@ +; RUN: opt -function-specialization -func-specialization-avg-iters-cost=3 -S < %s | FileCheck %s + +; Check that the loop depth results in a larger specialization bonus. +; CHECK: @foo.1( +; CHECK: @foo.2( + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + +@A = external dso_local constant i32, align 4 +@B = external dso_local constant i32, align 4 +@C = external dso_local constant i32, align 4 +@D = external dso_local constant i32, align 4 + +declare i1 @cond_begin() +declare i1 @cond_end() +declare i1 @getCond() + +define internal i32 @foo(i32 %x, i32* %b, i32* %c) { +entry: + br label %loop.entry + +loop.entry: + br label %loop2.entry + +loop2.entry: + br label %loop2.body + +loop2.body: + %0 = load i32, i32* %b, align 4 + %1 = load i32, i32* %c, align 4 + %add.0 = add nsw i32 %0, %1 + %add = add nsw i32 %add.0, %x + br label %loop2.end + +loop2.end: + %cond.end = call i1 @cond_end() + br i1 %cond.end, label %loop2.entry, label %loop.end + +loop.end: + %cond2.end = call i1 @getCond() + br i1 %cond2.end, label %loop.entry, label %return + +return: + ret i32 %add +} + +define dso_local i32 @bar(i32 %x, i32 %y) { +entry: + %tobool = icmp ne i32 %x, 0 + br i1 %tobool, label %if.then, label %if.else + +if.then: + %call = call i32 @foo(i32 %x, i32* @A, i32* @C) + br label %return + +if.else: + %call1 = call i32 @foo(i32 %y, i32* @B, i32* @D) + br label %return + +return: + %retval.0 = phi i32 [ %call, %if.then ], [ %call1, %if.else ] + ret i32 %retval.0 +} \ No newline at end of file diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-recursive.ll @@ -39,14 +39,7 @@ define i32 @main() { ; CHECK-LABEL: @main( -; CHECK-NEXT: [[TEMP_I:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TEMP_I]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull [[TMP1]]) -; CHECK-NEXT: call void @print_val(i32 1) -; CHECK-NEXT: store i32 2, i32* [[TEMP_I]], align 4 -; CHECK-NEXT: call void @recursiveFunc(i32* nonnull [[TEMP_I]]) -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[TEMP_I]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull [[TMP2]]) +; CHECK-NEXT: call void @recursiveFunc(i32* nonnull @Global) ; CHECK-NEXT: ret i32 0 ; call void @recursiveFunc(i32* nonnull @Global) diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization3.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization3.ll --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization3.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization3.ll @@ -1,7 +1,5 @@ ; RUN: opt -function-specialization -func-specialization-avg-iters-cost=3 -S < %s | \ ; RUN: FileCheck %s --check-prefixes=COMMON,DISABLED -; RUN: opt -function-specialization -func-specialization-avg-iters-cost=4 -S < %s | \ -; RUN: FileCheck %s --check-prefixes=COMMON,FORCE ; RUN: opt -function-specialization -force-function-specialization -S < %s | \ ; RUN: FileCheck %s --check-prefixes=COMMON,FORCE ; RUN: opt -function-specialization -func-specialization-avg-iters-cost=3 -force-function-specialization -S < %s | \