diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -4607,6 +4607,39 @@ static const char ID; }; +struct HotColdState : public BitIntegerState { + enum { INVALID = 0, HOT = 1 << 0, COLD = 1 << 1, UNKNOWN = HOT | COLD }; + static_assert(UNKNOWN == getBestState(), "Unexpected best state"); + + // Is this state assumed to be hot. + bool isAssumedHot() const { return Assumed == HOT; } + + // Is this state assumed to be cold. + bool isAssumedCold() const { return Assumed == COLD; } +}; + +/// This attribute propagates hot/cold annotations. +struct AAHotCold : public StateWrapper { + using Base = StateWrapper; + AAHotCold(const IRPosition &IRP, Attributor &A) : Base(IRP) {} + + static AAHotCold &createForPosition(const IRPosition &IRP, Attributor &A); + + /// See AbstractAttribute::getName() + const std::string getName() const override { return "AAHotCold"; } + + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is AAHotCold. + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + + /// Unique ID (due to the unique address) + static const char ID; +}; + raw_ostream &operator<<(raw_ostream &, const AAPointerInfo::Access &); /// Run options, used by the pass manager. diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -2481,6 +2481,9 @@ if (EnableHeapToStack) getOrCreateAAFor(FPos); + // Every function can be "hot/cold" + getOrCreateAAFor(FPos); + // Return attributes are only appropriate if the return type is non void. Type *ReturnType = F.getReturnType(); if (!ReturnType->isVoidTy()) { diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -9524,6 +9524,86 @@ bool CanReachUnknownCallee = false; }; +struct AAHotColdFunction : public AAHotCold { + AAHotColdFunction(const IRPosition &IRP, Attributor &A) : AAHotCold(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + if (getIRPosition().hasAttr({Attribute::Cold})) { + addKnownBits(HotColdState::COLD); + indicatePessimisticFixpoint(); + return; + } + + if (getIRPosition().hasAttr({Attribute::Hot})) { + addKnownBits(HotColdState::HOT); + indicatePessimisticFixpoint(); + return; + } + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // We will use this to determine change. + HotColdState OldState = getState(); + + // If all of the callers of a function are cold, we can assume that the + // function is cold too. But we can't assume it is hot. + // TODO: Propagate accross calls inside the same block. + auto CallSiteCheck = [&](AbstractCallSite ACS) { + Function *Caller = ACS.getInstruction()->getFunction(); + const AAHotCold &AA = A.getAAFor( + *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); + *this &= AA.getState(); + return true; + }; + + // If we don't know all call sites it is not possible for us to know + // if this function is hot or cold. + bool AllCallSitesKnown; + if (!A.checkForAllCallSites(CallSiteCheck, *this, true, AllCallSitesKnown)) + indicatePessimisticFixpoint(); + + return OldState == getState() ? ChangeStatus::UNCHANGED + : ChangeStatus::CHANGED; + } + + ChangeStatus manifest(Attributor &A) override { + if (getAssumed() == HotColdState::INVALID || + getAssumed() == HotColdState::UNKNOWN) + return ChangeStatus::UNCHANGED; + + LLVMContext &Ctx = getAssociatedFunction()->getContext(); + + const auto Attrs = { + Attribute::get(Ctx, isAssumedHot() ? Attribute::Hot : Attribute::Cold)}; + return IRAttributeManifest::manifestAttrs(A, this->getIRPosition(), Attrs); + } + + const std::string getAsStr() const override { + std::string State; + + switch (getAssumed()) { + case HotColdState::UNKNOWN: + State = "Unknown"; + break; + case HotColdState::INVALID: + State = "Invalid"; + break; + case HotColdState::HOT: + State = "Hot"; + break; + case HotColdState::COLD: + State = "Cold"; + break; + } + + return "HotCold[" + State + "]"; + } + + void trackStatistics() const override {} +}; + } // namespace AACallGraphNode *AACallEdgeIterator::operator*() const { @@ -9559,6 +9639,7 @@ const char AACallEdges::ID = 0; const char AAFunctionReachability::ID = 0; const char AAPointerInfo::ID = 0; +const char AAHotCold::ID = 0; // Macro magic to create the static generator function for attributes that // follow the naming scheme. @@ -9681,6 +9762,7 @@ CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAUndefinedBehavior) CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AACallEdges) CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAFunctionReachability) +CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAHotCold) CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAMemoryBehavior) diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll @@ -150,15 +150,15 @@ ; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 0 ; IS__TUNIT_NPM-NEXT: store i32 1, i32* [[TMP1]], align 8 ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; IS__TUNIT_NPM-NEXT: [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32* -; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 8 -; IS__TUNIT_NPM-NEXT: [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i64, i64* [[S_0_1]], align 8 -; IS__TUNIT_NPM-NEXT: [[C0:%.*]] = call i32 @f(i32 [[TMP0]], i64 [[TMP1]]) #[[ATTR0]] ; IS__TUNIT_NPM-NEXT: [[S_CAST1:%.*]] = bitcast %struct.ss* [[S]] to i32* -; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[S_CAST1]], align 32 +; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[S_CAST1]], align 8 ; IS__TUNIT_NPM-NEXT: [[S_0_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 -; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = load i64, i64* [[S_0_12]], align 32 +; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i64, i64* [[S_0_12]], align 8 +; IS__TUNIT_NPM-NEXT: [[C0:%.*]] = call i32 @f(i32 [[TMP0]], i64 [[TMP1]]) #[[ATTR0]] +; IS__TUNIT_NPM-NEXT: [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32* +; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[S_CAST]], align 32 +; IS__TUNIT_NPM-NEXT: [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 +; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = load i64, i64* [[S_0_1]], align 32 ; IS__TUNIT_NPM-NEXT: [[C1:%.*]] = call i32 @g(i32 [[TMP2]], i64 [[TMP3]]) #[[ATTR0]] ; IS__TUNIT_NPM-NEXT: [[A:%.*]] = add i32 [[C0]], [[C1]] ; IS__TUNIT_NPM-NEXT: ret i32 [[A]] diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll --- a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll @@ -159,15 +159,15 @@ ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@unions ; IS__TUNIT_NPM-SAME: () #[[ATTR0]] { ; IS__TUNIT_NPM-NEXT: entry: -; IS__TUNIT_NPM-NEXT: [[MYSTR_CAST:%.*]] = bitcast %struct.MYstr* @mystr to i8* -; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i8, i8* [[MYSTR_CAST]], align 8 -; IS__TUNIT_NPM-NEXT: [[MYSTR_0_1:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* @mystr, i32 0, i32 1 -; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_1]], align 8 -; IS__TUNIT_NPM-NEXT: call void @vfu1(i8 [[TMP0]], i32 [[TMP1]]) #[[ATTR0]] ; IS__TUNIT_NPM-NEXT: [[MYSTR_CAST1:%.*]] = bitcast %struct.MYstr* @mystr to i8* -; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i8, i8* [[MYSTR_CAST1]], align 8 -; IS__TUNIT_NPM-NEXT: [[MYSTR_0_12:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* @mystr, i32 0, i32 1 -; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[MYSTR_0_12]], align 8 +; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i8, i8* [[MYSTR_CAST1]], align 8 +; IS__TUNIT_NPM-NEXT: [[MYSTR_0_12:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* @mystr, i32 0, i32 1 +; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_12]], align 8 +; IS__TUNIT_NPM-NEXT: call void @vfu1(i8 [[TMP0]], i32 [[TMP1]]) #[[ATTR0]] +; IS__TUNIT_NPM-NEXT: [[MYSTR_CAST:%.*]] = bitcast %struct.MYstr* @mystr to i8* +; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i8, i8* [[MYSTR_CAST]], align 8 +; IS__TUNIT_NPM-NEXT: [[MYSTR_0_1:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* @mystr, i32 0, i32 1 +; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[MYSTR_0_1]], align 8 ; IS__TUNIT_NPM-NEXT: [[RESULT:%.*]] = call i32 @vfu2(i8 [[TMP2]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]] ; IS__TUNIT_NPM-NEXT: ret i32 [[RESULT]] ; @@ -289,15 +289,15 @@ ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@unions_v2 ; IS__TUNIT_NPM-SAME: () #[[ATTR0]] { ; IS__TUNIT_NPM-NEXT: entry: -; IS__TUNIT_NPM-NEXT: [[MYSTR_CAST:%.*]] = bitcast %struct.MYstr* @mystr to i8* -; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i8, i8* [[MYSTR_CAST]], align 8 -; IS__TUNIT_NPM-NEXT: [[MYSTR_0_1:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* @mystr, i32 0, i32 1 -; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_1]], align 8 -; IS__TUNIT_NPM-NEXT: call void @vfu1(i8 [[TMP0]], i32 [[TMP1]]) #[[ATTR0]] ; IS__TUNIT_NPM-NEXT: [[MYSTR_CAST1:%.*]] = bitcast %struct.MYstr* @mystr to i8* -; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i8, i8* [[MYSTR_CAST1]], align 8 -; IS__TUNIT_NPM-NEXT: [[MYSTR_0_12:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* @mystr, i32 0, i32 1 -; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[MYSTR_0_12]], align 8 +; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i8, i8* [[MYSTR_CAST1]], align 8 +; IS__TUNIT_NPM-NEXT: [[MYSTR_0_12:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* @mystr, i32 0, i32 1 +; IS__TUNIT_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_12]], align 8 +; IS__TUNIT_NPM-NEXT: call void @vfu1(i8 [[TMP0]], i32 [[TMP1]]) #[[ATTR0]] +; IS__TUNIT_NPM-NEXT: [[MYSTR_CAST:%.*]] = bitcast %struct.MYstr* @mystr to i8* +; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = load i8, i8* [[MYSTR_CAST]], align 8 +; IS__TUNIT_NPM-NEXT: [[MYSTR_0_1:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* @mystr, i32 0, i32 1 +; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[MYSTR_0_1]], align 8 ; IS__TUNIT_NPM-NEXT: [[RESULT:%.*]] = call i32 @vfu2_v2(i8 [[TMP2]], i32 [[TMP3]]) #[[ATTR2]] ; IS__TUNIT_NPM-NEXT: ret i32 [[RESULT]] ; diff --git a/llvm/test/Transforms/Attributor/depgraph.ll b/llvm/test/Transforms/Attributor/depgraph.ll --- a/llvm/test/Transforms/Attributor/depgraph.ll +++ b/llvm/test/Transforms/Attributor/depgraph.ll @@ -125,6 +125,8 @@ ; GRAPH-EMPTY: ; GRAPH-NEXT: [AAHeapToStack] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state [H2S] Mallocs Good/Bad: 0/0 ; GRAPH-EMPTY: +; GRAPH-NEXT: [AAHotCold] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state HotCold[Unknown] +; GRAPH-EMPTY: ; GRAPH-NEXT: [AAValueSimplify] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state not-simple ; GRAPH-EMPTY: ; GRAPH-NEXT: [AAAlign] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state align<1-16> diff --git a/llvm/test/Transforms/Attributor/hotcold.ll b/llvm/test/Transforms/Attributor/hotcold.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Attributor/hotcold.ll @@ -0,0 +1,91 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM + +@G = global i32 0, align 4 + +;. +; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = global i32 0, align 4 +;. +define internal void @test() #0 { +; IS__TUNIT____: Function Attrs: cold nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@test +; IS__TUNIT____-SAME: () #[[ATTR0:[0-9]+]] { +; IS__TUNIT____-NEXT: store i32 1, i32* @G, align 4 +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____: Function Attrs: cold nofree norecurse nosync nounwind willreturn writeonly +; IS__CGSCC____-LABEL: define {{[^@]+}}@test +; IS__CGSCC____-SAME: () #[[ATTR0:[0-9]+]] { +; IS__CGSCC____-NEXT: store i32 1, i32* @G, align 4 +; IS__CGSCC____-NEXT: ret void +; + store i32 1, i32* @G, align 4 + ret void +} + +define internal void @cold_caller1() #1 { +; IS__TUNIT____: Function Attrs: cold nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@cold_caller1 +; IS__TUNIT____-SAME: () #[[ATTR0]] { +; IS__TUNIT____-NEXT: call void @test() #[[ATTR1:[0-9]+]] +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____: Function Attrs: cold nofree norecurse nosync nounwind willreturn writeonly +; IS__CGSCC____-LABEL: define {{[^@]+}}@cold_caller1 +; IS__CGSCC____-SAME: () #[[ATTR0]] { +; IS__CGSCC____-NEXT: call void @test() #[[ATTR2:[0-9]+]] +; IS__CGSCC____-NEXT: ret void +; + call void @test() + ret void +} + +define internal void @cold_caller2() #1 { +; IS__TUNIT____: Function Attrs: cold nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@cold_caller2 +; IS__TUNIT____-SAME: () #[[ATTR0]] { +; IS__TUNIT____-NEXT: call void @test() #[[ATTR1]] +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____: Function Attrs: cold nofree norecurse nosync nounwind willreturn writeonly +; IS__CGSCC____-LABEL: define {{[^@]+}}@cold_caller2 +; IS__CGSCC____-SAME: () #[[ATTR0]] { +; IS__CGSCC____-NEXT: call void @test() #[[ATTR2]] +; IS__CGSCC____-NEXT: ret void +; + call void @test() + ret void +} + +define void @external() { +; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@external +; IS__TUNIT____-SAME: () #[[ATTR1]] { +; IS__TUNIT____-NEXT: call void @cold_caller2() #[[ATTR1]] +; IS__TUNIT____-NEXT: call void @cold_caller1() #[[ATTR1]] +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly +; IS__CGSCC____-LABEL: define {{[^@]+}}@external +; IS__CGSCC____-SAME: () #[[ATTR1:[0-9]+]] { +; IS__CGSCC____-NEXT: call void @cold_caller2() #[[ATTR2]] +; IS__CGSCC____-NEXT: call void @cold_caller1() #[[ATTR2]] +; IS__CGSCC____-NEXT: ret void +; + call void @cold_caller2() + call void @cold_caller1() + ret void +} + +attributes #1 = { cold } +;. +; IS__TUNIT____: attributes #[[ATTR0]] = { cold nofree nosync nounwind willreturn writeonly } +; IS__TUNIT____: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn writeonly } +;. +; IS__CGSCC____: attributes #[[ATTR0]] = { cold nofree norecurse nosync nounwind willreturn writeonly } +; IS__CGSCC____: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn writeonly } +; IS__CGSCC____: attributes #[[ATTR2]] = { nounwind willreturn writeonly } +;.