diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -4607,6 +4607,33 @@ static const char ID; }; +/// This attribute propagates hot/cold annotations. +struct AAHotCold : public StateWrapper { + using Base = StateWrapper; + + AAHotCold(const IRPosition &IRP, Attributor &A) : Base(IRP) {} + + static AAHotCold &createForPosition(const IRPosition &IRP, Attributor &A); + + /// See AbstractAttribute::getName() + const std::string getName() const override { return "AAHotCold"; } + + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// If this function is assumed to be hot, return true. If assumed cold + /// return false. If we don't know return None. + virtual Optional isAssumedHot() const = 0; + + /// This function should return true if the type of the \p AA is AAHotCold. + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + + /// Unique ID (due to the unique address) + static const char ID; +}; + raw_ostream &operator<<(raw_ostream &, const AAPointerInfo::Access &); /// Run options, used by the pass manager. diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -2481,6 +2481,9 @@ if (EnableHeapToStack) getOrCreateAAFor(FPos); + // Every function can be "hot/cold" + getOrCreateAAFor(FPos); + // Return attributes are only appropriate if the return type is non void. Type *ReturnType = F.getReturnType(); if (!ReturnType->isVoidTy()) { diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -9524,6 +9524,92 @@ bool CanReachUnknownCallee = false; }; +struct AAHotColdFunction : public AAHotCold { + AAHotColdFunction(const IRPosition &IRP, Attributor &A) : AAHotCold(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + if (getIRPosition().hasAttr({Attribute::Cold})) { + IsHot = false; + indicateOptimisticFixpoint(); + return; + } + + if (getIRPosition().hasAttr({Attribute::Hot})) { + IsHot = true; + indicateOptimisticFixpoint(); + return; + } + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // We will use this to determine change. + Optional OldIsHot = IsHot; + + // If all of the callers of a function are cold, we can assume that the + // function is cold too. But we can't assume it is hot. + // TODO: Propagate accross calls inside the same block. + auto CallSiteCheck = [&](AbstractCallSite ACS) { + Function *Caller = ACS.getInstruction()->getFunction(); + const AAHotCold &AA = A.getAAFor( + *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); + + Optional AAAssumedHot = AA.isAssumedHot(); + if (!AAAssumedHot.hasValue()) + return true; + + // We can only distribute cold this way. + if (!IsHot.hasValue() && !AAAssumedHot.getValue()) + IsHot = AAAssumedHot; + else if (IsHot != AAAssumedHot) + // Conflicting hot cold input from callers. + // give up. + return false; + return true; + }; + + // If we don't know all call sites it is not possible for us to know + // if this function is hot or cold. + bool AllCallSitesKnown; + if (!A.checkForAllCallSites(CallSiteCheck, *this, true, + AllCallSitesKnown)) { + IsHot = llvm::None; + indicatePessimisticFixpoint(); + } + + return OldIsHot == IsHot ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED; + } + + ChangeStatus manifest(Attributor &A) override { + if (!IsHot.hasValue()) + return ChangeStatus::UNCHANGED; + + LLVMContext &Ctx = getAssociatedFunction()->getContext(); + + const auto Attrs = {Attribute::get( + Ctx, IsHot.getValue() ? Attribute::Hot : Attribute::Cold)}; + return IRAttributeManifest::manifestAttrs(A, this->getIRPosition(), Attrs); + } + + const std::string getAsStr() const override { + std::string State; + if (IsHot.hasValue()) + State = IsHot.getValue() ? "Hot" : "Cold"; + else + State = "Unknown"; + + return "HotCold[" + State + "]"; + } + + void trackStatistics() const override {} + + Optional isAssumedHot() const override { return IsHot; } + + /// true if the function is hot. false if it cold and None if we don't know. + Optional IsHot = llvm::None; +}; + } // namespace AACallGraphNode *AACallEdgeIterator::operator*() const { @@ -9559,6 +9645,7 @@ const char AACallEdges::ID = 0; const char AAFunctionReachability::ID = 0; const char AAPointerInfo::ID = 0; +const char AAHotCold::ID = 0; // Macro magic to create the static generator function for attributes that // follow the naming scheme. @@ -9681,6 +9768,7 @@ CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAUndefinedBehavior) CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AACallEdges) CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAFunctionReachability) +CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAHotCold) CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAMemoryBehavior) diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll @@ -150,15 +150,15 @@ ; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 ; IS__TUNIT_NPM-NEXT: store i32 1, i32* [[TMP1]], align 8 ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; IS__TUNIT_NPM-NEXT: [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32* -; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 8 -; IS__TUNIT_NPM-NEXT: [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i64, i64* [[S_0_1]], align 8 -; IS__TUNIT_NPM-NEXT: [[C0:%.*]] = call i32 @f(i32 [[TMP0]], i64 [[TMP1]]) #[[ATTR0]] ; IS__TUNIT_NPM-NEXT: [[S_CAST1:%.*]] = bitcast %struct.ss* [[S]] to i32* -; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[S_CAST1]], align 32 +; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[S_CAST1]], align 8 ; IS__TUNIT_NPM-NEXT: [[S_0_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = load i64, i64* [[S_0_12]], align 32 +; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i64, i64* [[S_0_12]], align 8 +; IS__TUNIT_NPM-NEXT: [[C0:%.*]] = call i32 @f(i32 [[TMP0]], i64 [[TMP1]]) #[[ATTR0]] +; IS__TUNIT_NPM-NEXT: [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32* +; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[S_CAST]], align 32 +; IS__TUNIT_NPM-NEXT: [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = load i64, i64* [[S_0_1]], align 32 ; IS__TUNIT_NPM-NEXT: [[C1:%.*]] = call i32 @g(i32 [[TMP2]], i64 [[TMP3]]) #[[ATTR0]] ; IS__TUNIT_NPM-NEXT: [[A:%.*]] = add i32 [[C0]], [[C1]] ; IS__TUNIT_NPM-NEXT: ret i32 [[A]] diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll --- a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll @@ -159,15 +159,15 @@ ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@unions ; IS__TUNIT_NPM-SAME: () #[[ATTR0]] { ; IS__TUNIT_NPM-NEXT: entry: -; IS__TUNIT_NPM-NEXT: [[MYSTR_CAST:%.*]] = bitcast %struct.MYstr* @mystr to i8* -; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i8, i8* [[MYSTR_CAST]], align 8 -; IS__TUNIT_NPM-NEXT: [[MYSTR_0_1:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* @mystr, i32 0, i32 1 -; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_1]], align 8 -; IS__TUNIT_NPM-NEXT: call void @vfu1(i8 [[TMP0]], i32 [[TMP1]]) #[[ATTR0]] ; IS__TUNIT_NPM-NEXT: [[MYSTR_CAST1:%.*]] = bitcast %struct.MYstr* @mystr to i8* -; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i8, i8* [[MYSTR_CAST1]], align 8 -; IS__TUNIT_NPM-NEXT: [[MYSTR_0_12:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* @mystr, i32 0, i32 1 -; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[MYSTR_0_12]], align 8 +; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i8, i8* [[MYSTR_CAST1]], align 8 +; IS__TUNIT_NPM-NEXT: [[MYSTR_0_12:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* @mystr, i32 0, i32 1 +; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_12]], align 8 +; IS__TUNIT_NPM-NEXT: call void @vfu1(i8 [[TMP0]], i32 [[TMP1]]) #[[ATTR0]] +; IS__TUNIT_NPM-NEXT: [[MYSTR_CAST:%.*]] = bitcast %struct.MYstr* @mystr to i8* +; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i8, i8* [[MYSTR_CAST]], align 8 +; IS__TUNIT_NPM-NEXT: [[MYSTR_0_1:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* @mystr, i32 0, i32 1 +; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[MYSTR_0_1]], align 8 ; IS__TUNIT_NPM-NEXT: [[RESULT:%.*]] = call i32 @vfu2(i8 [[TMP2]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]] ; IS__TUNIT_NPM-NEXT: ret i32 [[RESULT]] ; @@ -289,15 +289,15 @@ ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@unions_v2 ; IS__TUNIT_NPM-SAME: () #[[ATTR0]] { ; IS__TUNIT_NPM-NEXT: entry: -; IS__TUNIT_NPM-NEXT: [[MYSTR_CAST:%.*]] = bitcast %struct.MYstr* @mystr to i8* -; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i8, i8* [[MYSTR_CAST]], align 8 -; IS__TUNIT_NPM-NEXT: [[MYSTR_0_1:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* @mystr, i32 0, i32 1 -; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_1]], align 8 -; IS__TUNIT_NPM-NEXT: call void @vfu1(i8 [[TMP0]], i32 [[TMP1]]) #[[ATTR0]] ; IS__TUNIT_NPM-NEXT: [[MYSTR_CAST1:%.*]] = bitcast %struct.MYstr* @mystr to i8* -; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i8, i8* [[MYSTR_CAST1]], align 8 -; IS__TUNIT_NPM-NEXT: [[MYSTR_0_12:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* @mystr, i32 0, i32 1 -; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[MYSTR_0_12]], align 8 +; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i8, i8* [[MYSTR_CAST1]], align 8 +; IS__TUNIT_NPM-NEXT: [[MYSTR_0_12:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* @mystr, i32 0, i32 1 +; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_12]], align 8 +; IS__TUNIT_NPM-NEXT: call void @vfu1(i8 [[TMP0]], i32 [[TMP1]]) #[[ATTR0]] +; IS__TUNIT_NPM-NEXT: [[MYSTR_CAST:%.*]] = bitcast %struct.MYstr* @mystr to i8* +; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i8, i8* [[MYSTR_CAST]], align 8 +; IS__TUNIT_NPM-NEXT: [[MYSTR_0_1:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* @mystr, i32 0, i32 1 +; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[MYSTR_0_1]], align 8 ; IS__TUNIT_NPM-NEXT: [[RESULT:%.*]] = call i32 @vfu2_v2(i8 [[TMP2]], i32 [[TMP3]]) #[[ATTR2]] ; IS__TUNIT_NPM-NEXT: ret i32 [[RESULT]] ; diff --git a/llvm/test/Transforms/Attributor/hotcold.ll b/llvm/test/Transforms/Attributor/hotcold.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Attributor/hotcold.ll @@ -0,0 +1,91 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM + +@G = global i32 0, align 4 + +;. +; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = global i32 0, align 4 +;. +define internal void @test() #0 { +; IS__TUNIT____: Function Attrs: cold nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@test +; IS__TUNIT____-SAME: () #[[ATTR0:[0-9]+]] { +; IS__TUNIT____-NEXT: store i32 1, i32* @G, align 4 +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____: Function Attrs: cold nofree norecurse nosync nounwind willreturn writeonly +; IS__CGSCC____-LABEL: define {{[^@]+}}@test +; IS__CGSCC____-SAME: () #[[ATTR0:[0-9]+]] { +; IS__CGSCC____-NEXT: store i32 1, i32* @G, align 4 +; IS__CGSCC____-NEXT: ret void +; + store i32 1, i32* @G, align 4 + ret void +} + +define internal void @cold_caller1() #1 { +; IS__TUNIT____: Function Attrs: cold nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@cold_caller1 +; IS__TUNIT____-SAME: () #[[ATTR0]] { +; IS__TUNIT____-NEXT: call void @test() #[[ATTR1:[0-9]+]] +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____: Function Attrs: cold nofree norecurse nosync nounwind willreturn writeonly +; IS__CGSCC____-LABEL: define {{[^@]+}}@cold_caller1 +; IS__CGSCC____-SAME: () #[[ATTR0]] { +; IS__CGSCC____-NEXT: call void @test() #[[ATTR2:[0-9]+]] +; IS__CGSCC____-NEXT: ret void +; + call void @test() + ret void +} + +define internal void @cold_caller2() #1 { +; IS__TUNIT____: Function Attrs: cold nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@cold_caller2 +; IS__TUNIT____-SAME: () #[[ATTR0]] { +; IS__TUNIT____-NEXT: call void @test() #[[ATTR1]] +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____: Function Attrs: cold nofree norecurse nosync nounwind willreturn writeonly +; IS__CGSCC____-LABEL: define {{[^@]+}}@cold_caller2 +; IS__CGSCC____-SAME: () #[[ATTR0]] { +; IS__CGSCC____-NEXT: call void @test() #[[ATTR2]] +; IS__CGSCC____-NEXT: ret void +; + call void @test() + ret void +} + +define void @external() { +; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@external +; IS__TUNIT____-SAME: () #[[ATTR1]] { +; IS__TUNIT____-NEXT: call void @cold_caller2() #[[ATTR1]] +; IS__TUNIT____-NEXT: call void @cold_caller1() #[[ATTR1]] +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; IS__CGSCC____-LABEL: define {{[^@]+}}@external +; IS__CGSCC____-SAME: () #[[ATTR1:[0-9]+]] { +; IS__CGSCC____-NEXT: call void @cold_caller2() #[[ATTR2]] +; IS__CGSCC____-NEXT: call void @cold_caller1() #[[ATTR2]] +; IS__CGSCC____-NEXT: ret void +; + call void @cold_caller2() + call void @cold_caller1() + ret void +} + +attributes #1 = { cold } +;. +; IS__TUNIT____: attributes #[[ATTR0]] = { cold nofree nosync nounwind willreturn writeonly } +; IS__TUNIT____: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn writeonly } +;. +; IS__CGSCC____: attributes #[[ATTR0]] = { cold nofree norecurse nosync nounwind willreturn writeonly } +; IS__CGSCC____: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn writeonly } +; IS__CGSCC____: attributes #[[ATTR2]] = { nounwind willreturn writeonly } +;.