Index: docs/ClangCommandLineReference.rst =================================================================== --- docs/ClangCommandLineReference.rst +++ docs/ClangCommandLineReference.rst @@ -1950,6 +1950,10 @@ Use unique names for text and data sections (ELF Only) +.. option:: -freorder-functions, -fno-reorder-functions + +Enable function reordering for better code locality. When enabled, hot/cold functions will be placed under .text.hot/.text.unlikely subsection in the final binary, respectively. If -ffunction-section is provided, this will append .text.hot/.text.unlikely prefix to the section name. This is turned on by default, but only effective when profile data is provided. + .. option:: -funit-at-a-time, -fno-unit-at-a-time .. option:: -funroll-loops, -fno-unroll-loops Index: include/clang/Driver/Options.td =================================================================== --- include/clang/Driver/Options.td +++ include/clang/Driver/Options.td @@ -1588,6 +1588,12 @@ def frandom_seed_EQ : Joined<["-"], "frandom-seed=">, Group; def freg_struct_return : Flag<["-"], "freg-struct-return">, Group, Flags<[CC1Option]>, HelpText<"Override the default ABI to return small structs in registers">; +def freorder_functions : Flag <["-"], "freorder-functions">, + Group, Flags<[CC1Option]>, + HelpText<"Using special subsections for hot/cold functions to improve code " + "locality">; +def fno_reorder_functions : Flag <["-"], "fno-reorder-functions">, + Group, Flags<[CC1Option]>; def frtti : Flag<["-"], "frtti">, Group; def : Flag<["-"], "fsched-interblock">, Group; def fshort_enums : Flag<["-"], "fshort-enums">, Group, Flags<[CC1Option]>, Index: include/clang/Frontend/CodeGenOptions.def =================================================================== --- include/clang/Frontend/CodeGenOptions.def +++ include/clang/Frontend/CodeGenOptions.def @@ -220,6 +220,7 @@ CODEGENOPT(TimePasses , 1, 0) ///< Set when -ftime-report is enabled. CODEGENOPT(UnrollLoops , 1, 0) ///< Control whether loops are unrolled. CODEGENOPT(RerollLoops , 1, 0) ///< Control whether loops are rerolled. +CODEGENOPT(ReorderFunctions , 1, 1) ///< Set when -freorder-functions is enabled. CODEGENOPT(NoUseJumpTables , 1, 0) ///< Set when -fno-jump-tables is enabled. CODEGENOPT(UnsafeFPMath , 1, 0) ///< Allow unsafe floating point optzns. CODEGENOPT(UnwindTables , 1, 0) ///< Emit unwind tables. Index: lib/CodeGen/BackendUtil.cpp =================================================================== --- lib/CodeGen/BackendUtil.cpp +++ lib/CodeGen/BackendUtil.cpp @@ -463,6 +463,7 @@ Options.FunctionSections = CodeGenOpts.FunctionSections; Options.DataSections = CodeGenOpts.DataSections; Options.UniqueSectionNames = CodeGenOpts.UniqueSectionNames; + Options.ReorderFunctions = CodeGenOpts.ReorderFunctions; Options.EmulatedTLS = CodeGenOpts.EmulatedTLS; Options.ExplicitEmulatedTLS = CodeGenOpts.ExplicitEmulatedTLS; Options.DebuggerTuning = CodeGenOpts.getDebuggerTuning(); Index: lib/Driver/ToolChains/Clang.cpp =================================================================== --- lib/Driver/ToolChains/Clang.cpp +++ lib/Driver/ToolChains/Clang.cpp @@ -3891,6 +3891,10 @@ options::OPT_fno_unique_section_names, true)) CmdArgs.push_back("-fno-unique-section-names"); + if (!Args.hasFlag(options::OPT_freorder_functions, + options::OPT_fno_reorder_functions, true)) + CmdArgs.push_back("-fno-reorder-functions"); + if (auto *A = Args.getLastArg( options::OPT_finstrument_functions, options::OPT_finstrument_functions_after_inlining, @@ -3929,6 +3933,82 @@ Args.AddLastArg(CmdArgs, options::OPT_working_directory); + bool ARCMTEnabled = false; + if (!Args.hasArg(options::OPT_fno_objc_arc, options::OPT_fobjc_arc)) { + if (const Arg *A = Args.getLastArg(options::OPT_ccc_arcmt_check, + options::OPT_ccc_arcmt_modify, + options::OPT_ccc_arcmt_migrate)) { + ARCMTEnabled = true; + switch (A->getOption().getID()) { + default: + llvm_unreachable("missed a case"); + case options::OPT_ccc_arcmt_check: + CmdArgs.push_back("-arcmt-check"); + break; + case options::OPT_ccc_arcmt_modify: + CmdArgs.push_back("-arcmt-modify"); + break; + case options::OPT_ccc_arcmt_migrate: + CmdArgs.push_back("-arcmt-migrate"); + CmdArgs.push_back("-mt-migrate-directory"); + CmdArgs.push_back(A->getValue()); + + Args.AddLastArg(CmdArgs, options::OPT_arcmt_migrate_report_output); + Args.AddLastArg(CmdArgs, options::OPT_arcmt_migrate_emit_arc_errors); + break; + } + } + } else { + Args.ClaimAllArgs(options::OPT_ccc_arcmt_check); + Args.ClaimAllArgs(options::OPT_ccc_arcmt_modify); + Args.ClaimAllArgs(options::OPT_ccc_arcmt_migrate); + } + + if (const Arg *A = Args.getLastArg(options::OPT_ccc_objcmt_migrate)) { + if (ARCMTEnabled) { + D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args) + << "-ccc-arcmt-migrate"; + } + CmdArgs.push_back("-mt-migrate-directory"); + CmdArgs.push_back(A->getValue()); + + if (!Args.hasArg(options::OPT_objcmt_migrate_literals, + options::OPT_objcmt_migrate_subscripting, + options::OPT_objcmt_migrate_property)) { + // None specified, means enable them all. + CmdArgs.push_back("-objcmt-migrate-literals"); + CmdArgs.push_back("-objcmt-migrate-subscripting"); + CmdArgs.push_back("-objcmt-migrate-property"); + } else { + Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_literals); + Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_subscripting); + Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_property); + } + } else { + Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_literals); + Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_subscripting); + Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_property); + Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_all); + Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_readonly_property); + Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_readwrite_property); + Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_property_dot_syntax); + Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_annotation); + Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_instancetype); + Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_nsmacros); + Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_protocol_conformance); + Args.AddLastArg(CmdArgs, options::OPT_objcmt_atomic_property); + Args.AddLastArg(CmdArgs, options::OPT_objcmt_returns_innerpointer_property); + Args.AddLastArg(CmdArgs, options::OPT_objcmt_ns_nonatomic_iosonly); + Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_designated_init); + Args.AddLastArg(CmdArgs, options::OPT_objcmt_whitelist_dir_path); + } + + // Pass the path to compiler resource files. + CmdArgs.push_back("-resource-dir"); + CmdArgs.push_back(D.ResourceDir.c_str()); + + Args.AddLastArg(CmdArgs, options::OPT_working_directory); + RenderARCMigrateToolOptions(D, Args, CmdArgs); // Add preprocessing options like -I, -D, etc. if we are using the Index: lib/Frontend/CompilerInvocation.cpp =================================================================== --- lib/Frontend/CompilerInvocation.cpp +++ lib/Frontend/CompilerInvocation.cpp @@ -756,6 +756,8 @@ Args.hasFlag(OPT_fstack_size_section, OPT_fno_stack_size_section, false); Opts.UniqueSectionNames = Args.hasFlag(OPT_funique_section_names, OPT_fno_unique_section_names, true); + Opts.ReorderFunctions = + Args.hasFlag(OPT_freorder_functions, OPT_fno_reorder_functions, true); Opts.MergeFunctions = Args.hasArg(OPT_fmerge_functions); Index: test/CodeGen/Inputs/freorder-functions.prof =================================================================== --- /dev/null +++ test/CodeGen/Inputs/freorder-functions.prof @@ -0,0 +1,5 @@ +hot_func:1000:0 + 1: 0 +cold_func:0:0 + 1: 1 + 2: 1 Index: test/CodeGen/freorder-functions.c =================================================================== --- /dev/null +++ test/CodeGen/freorder-functions.c @@ -0,0 +1,22 @@ +// REQUIRES: x86-registered-target + +// RUN: %clang_cc1 -triple x86_64-pc-linux -S -O3 -ffunction-sections -fprofile-sample-use=%S/Inputs/freorder-functions.prof -o - < %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-pc-linux -S -O3 -ffunction-sections -fprofile-sample-use=%S/Inputs/freorder-functions.prof -fno-reorder-functions -o - < %s | FileCheck --check-prefix=CHECK-NOPREFIX %s + +// opt tool option precedes driver option. +// RUN: %clang_cc1 -triple x86_64-pc-linux -S -O3 -ffunction-sections -fprofile-sample-use=%S/Inputs/freorder-functions.prof -fno-reorder-functions -mllvm -profile-guided-section-prefix=true -o - < %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-pc-linux -S -O3 -ffunction-sections -fprofile-sample-use=%S/Inputs/freorder-functions.prof -freorder-functions -mllvm -profile-guided-section-prefix=false -o - < %s | FileCheck --check-prefix=CHECK-NOPREFIX %s + +void hot_func() { + return; +} + +void cold_func() { + hot_func(); + return; +} + +// CHECK: .section .text.hot.hot_func,"ax",@progbits +// CHECK: .section .text.unlikely.cold_func,"ax",@progbits +// CHECK-NOPREFIX: .section .text.hot_func,"ax",@progbits +// CHECK-NOPREFIX: .section .text.cold_func,"ax",@progbits Index: test/Driver/function-sections.c =================================================================== --- test/Driver/function-sections.c +++ test/Driver/function-sections.c @@ -6,6 +6,8 @@ // CHECK-NODS-NOT: -fdata-sections // CHECK-US-NOT: -fno-unique-section-names // CHECK-NOUS: -fno-unique-section-names +// CHECK-RF-NOT: -fno-reorder-functions +// CHECK-NORF: -fno-reorder-functions // RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1 \ // RUN: -target i386-unknown-linux \ @@ -72,3 +74,13 @@ // RUN: -target i386-unknown-linux \ // RUN: -fno-unique-section-names \ // RUN: | FileCheck --check-prefix=CHECK-NOUS %s + +// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1 \ +// RUN: -target i386-unknown-linux \ +// RUN: -freorder-functions \ +// RUN: | FileCheck --check-prefix=CHECK-RF %s + +// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1 \ +// RUN: -target i386-unknown-linux \ +// RUN: -fno-reorder-functions \ +// RUN: | FileCheck --check-prefix=CHECK-NORF %s