diff --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h --- a/llvm/include/llvm/IR/Attributes.h +++ b/llvm/include/llvm/IR/Attributes.h @@ -951,9 +951,24 @@ /// attributes for inlining purposes. bool areInlineCompatible(const Function &Caller, const Function &Callee); + +/// Checks if there are any incompatible function attributes between +/// \p A and \p B. +/// +/// \param [in] A - The first function to be compared with. +/// \param [in] B - The second function to be compared with. +/// \returns true if the functions have compatible attributes. +bool areOutlineCompatible(const Function &A, const Function &B); + /// Merge caller's and callee's attributes. void mergeAttributesForInlining(Function &Caller, const Function &Callee); +/// Merges the functions attributes from \p ToMerge into function \p Base. +/// +/// \param [in,out] Base - The function being merged into. +/// \param [in] ToMerge - The function to merge attributes from. +void mergeAttributesForOutlining(Function &Base, const Function &ToMerge); + } // end namespace AttributeFuncs } // end namespace llvm diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp --- a/llvm/lib/IR/Attributes.cpp +++ b/llvm/lib/IR/Attributes.cpp @@ -2091,7 +2091,25 @@ return hasCompatibleFnAttrs(Caller, Callee); } +bool AttributeFuncs::areOutlineCompatible(const Function &A, + const Function &B) { + return hasCompatibleFnAttrs(A, B); +} + void AttributeFuncs::mergeAttributesForInlining(Function &Caller, const Function &Callee) { mergeFnAttrs(Caller, Callee); } + +void AttributeFuncs::mergeAttributesForOutlining(Function &Base, + const Function &ToMerge) { + + // We merge functions so that they meet the most general case. + // For example, if the NoNansFPMathAttr is set in one function, but not in + // the other, in the merged function we can say that the NoNansFPMathAttr + // is not set. + // However if we have the SpeculativeLoadHardeningAttr set true in one + // function, but not the other, we make sure that the function retains + // that aspect in the merged function. + mergeFnAttrs(Base, ToMerge); +} diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp --- a/llvm/lib/Transforms/IPO/IROutliner.cpp +++ b/llvm/lib/Transforms/IPO/IROutliner.cpp @@ -1201,6 +1201,8 @@ for (unsigned Idx = 1; Idx < CurrentGroup.Regions.size(); Idx++) { CurrentOS = CurrentGroup.Regions[Idx]; + AttributeFuncs::mergeAttributesForOutlining(*CurrentGroup.OutlinedFunction, + *CurrentOS->ExtractedFunction); // Create a new BasicBlock to hold the needed store instructions. BasicBlock *NewBB = BasicBlock::Create( diff --git a/llvm/test/Transforms/IROutliner/outlining-compatible-and-attribute-transfer.ll b/llvm/test/Transforms/IROutliner/outlining-compatible-and-attribute-transfer.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/IROutliner/outlining-compatible-and-attribute-transfer.ll @@ -0,0 +1,126 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; This has two compatible regions based on function attributes. We have +; attributes that should be transferred only if it is on all of the regions. + +; This includes the attributes, no-nans-fp-math, +; no-signed-zeros-fp-math, less-precise-fpmad, unsafe-fp-math, and +; no-infs-fp-math. Only when each instance of similarity has these attributes +; can we say that the outlined function can have these attributes since that +; is the more general case for these attributes. + +define void @outline_attrs1() #0 { +; CHECK-LABEL: @outline_attrs1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @outlined_ir_func_1(i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} + +define void @outline_attrs2() #0 { +; CHECK-LABEL: @outline_attrs2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @outlined_ir_func_1(i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} + +define void @outline_attrs3() #0 { +; CHECK-LABEL: @outline_attrs3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca float, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca float, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca float, align 4 +; CHECK-NEXT: call void @outlined_ir_func_0(float* [[A]], float* [[B]], float* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca float, align 4 + %b = alloca float, align 4 + %c = alloca float, align 4 + store float 2.0, float* %a, align 4 + store float 3.0, float* %b, align 4 + store float 4.0, float* %c, align 4 + %al = load float, float* %a + %bl = load float, float* %b + %cl = load float, float* %c + %0 = fmul float %al, %bl + ret void +} + +define void @outline_attrs4() { +; CHECK-LABEL: @outline_attrs4( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca float, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca float, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca float, align 4 +; CHECK-NEXT: call void @outlined_ir_func_0(float* [[A]], float* [[B]], float* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca float, align 4 + %b = alloca float, align 4 + %c = alloca float, align 4 + store float 2.0, float* %a, align 4 + store float 3.0, float* %b, align 4 + store float 4.0, float* %c, align 4 + %al = load float, float* %a + %bl = load float, float* %b + %cl = load float, float* %c + %0 = fmul float %al, %bl + ret void +} + +attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "less-precise-fpmad"="true" +"unsafe-fp-math"="true" "no-infs-fp-math"="true"} + +; CHECK: define internal void @outlined_ir_func_0(float* [[ARG0:%.*]], float* [[ARG1:%.*]], float* [[ARG2:%.*]]) [[ATTR1:#[0-9]+]] { +; CHECK: entry_to_outline: +; CHECK-NEXT: store float 2.000000e+00, float* [[ARG0]], align 4 +; CHECK-NEXT: store float 3.000000e+00, float* [[ARG1]], align 4 +; CHECK-NEXT: store float 4.000000e+00, float* [[ARG2]], align 4 +; CHECK-NEXT: [[AL:%.*]] = load float, float* [[ARG0]], align 4 +; CHECK-NEXT: [[BL:%.*]] = load float, float* [[ARG1]], align 4 +; CHECK-NEXT: [[CL:%.*]] = load float, float* [[ARG2]], align 4 + +; CHECK: define internal void @outlined_ir_func_1(i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]]) [[ATTR:#[0-9]+]] { +; CHECK: entry_to_outline: +; CHECK-NEXT: store i32 2, i32* [[ARG0]], align 4 +; CHECK-NEXT: store i32 3, i32* [[ARG1]], align 4 +; CHECK-NEXT: store i32 4, i32* [[ARG2]], align 4 +; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4 +; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4 +; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4 + + +; CHECK: attributes [[ATTR1]] = { minsize optsize "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "unsafe-fp-math"="false" } +; CHECK: attributes [[ATTR]] = { minsize optsize "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "unsafe-fp-math"="true" } diff --git a/llvm/test/Transforms/IROutliner/outlining-compatible-or-attribute-transfer.ll b/llvm/test/Transforms/IROutliner/outlining-compatible-or-attribute-transfer.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/IROutliner/outlining-compatible-or-attribute-transfer.ll @@ -0,0 +1,67 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < "%s" | FileCheck "%s" + +; This has two compatible regions. We have attributes that should be transferred +; even if it is on only one of the regions. + +; This includes the attributes no-jump-tables, profile-sample-accurate, +; speculative_load_hardening, and noimplicitfloat. When instance of similarity +; has these attributes can we say that the outlined function can have these +; attributes since that is the more general case. + +define void @outline_attrs1() #0 { +; CHECK-LABEL: @outline_attrs1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} + +define void @outline_attrs2() { +; CHECK-LABEL: @outline_attrs2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} + +attributes #0 = { "no-jump-tables"="true" "profile-sample-accurate"="true" "speculative_load_hardening" "noimplicitfloat"="true" "use-sample-profile"="true"} + +; CHECK: define internal void @outlined_ir_func_0(i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]]) [[ATTR:#[0-9]+]] { +; CHECK: entry_to_outline: +; CHECK-NEXT: store i32 2, i32* [[ARG0]], align 4 +; CHECK-NEXT: store i32 3, i32* [[ARG1]], align 4 +; CHECK-NEXT: store i32 4, i32* [[ARG2]], align 4 +; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4 +; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4 +; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4 + +; CHECK: attributes [[ATTR]] = { minsize optsize "no-jump-tables"="true" "noimplicitfloat"="true" "profile-sample-accurate"="true" "speculative_load_hardening" "use-sample-profile"="true" }