Index: llvm/include/llvm/MC/MCContext.h =================================================================== --- llvm/include/llvm/MC/MCContext.h +++ llvm/include/llvm/MC/MCContext.h @@ -642,6 +642,8 @@ unsigned Flags, const MCSymbolWasm *Group, unsigned UniqueID, const char *BeginSymName); + bool hasXCOFFSection(StringRef Section, XCOFF::CsectProperties CsectProp) const; + MCSectionXCOFF *getXCOFFSection( StringRef Section, SectionKind K, Optional CsectProp = None, Index: llvm/include/llvm/MC/MCStreamer.h =================================================================== --- llvm/include/llvm/MC/MCStreamer.h +++ llvm/include/llvm/MC/MCStreamer.h @@ -615,6 +615,12 @@ /// changed at the end of assembly. virtual void emitXCOFFRenameDirective(const MCSymbol *Name, StringRef Rename); + /// Emit a XCOFF .ref directive which creates R_REF type entry in the + /// relocation table for one or more symbols. + /// + /// \param Sym - The symbol on the .ref directive. + virtual void emitXCOFFRefDirective(StringRef Sym); + /// Emit an ELF .size directive. /// /// This corresponds to an assembler statement such as: Index: llvm/include/llvm/MC/MCXCOFFStreamer.h =================================================================== --- llvm/include/llvm/MC/MCXCOFFStreamer.h +++ llvm/include/llvm/MC/MCXCOFFStreamer.h @@ -32,6 +32,10 @@ void emitXCOFFSymbolLinkageWithVisibility(MCSymbol *Symbol, MCSymbolAttr Linkage, MCSymbolAttr Visibility) override; + void emitXCOFFRefDirective(StringRef Name) override { + report_fatal_error("emitXCOFFRefDirective is not implemented yet on object" + "generation path"); + } void emitXCOFFRenameDirective(const MCSymbol *Name, StringRef Rename) override { report_fatal_error("emitXCOFFRenameDirective is not implemented yet on " Index: llvm/lib/MC/MCAsmStreamer.cpp =================================================================== --- llvm/lib/MC/MCAsmStreamer.cpp +++ llvm/lib/MC/MCAsmStreamer.cpp @@ -198,6 +198,8 @@ void emitXCOFFRenameDirective(const MCSymbol *Name, StringRef Rename) override; + void emitXCOFFRefDirective(StringRef Name) override; + void emitELFSize(MCSymbol *Symbol, const MCExpr *Value) override; void emitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) override; @@ -929,6 +931,11 @@ EmitEOL(); } +void MCAsmStreamer::emitXCOFFRefDirective(StringRef Name) { + OS << "\t.ref " << Name; + EmitEOL(); +} + void MCAsmStreamer::emitELFSize(MCSymbol *Symbol, const MCExpr *Value) { assert(MAI->hasDotTypeDotSizeDirective()); OS << "\t.size\t"; Index: llvm/lib/MC/MCContext.cpp =================================================================== --- llvm/lib/MC/MCContext.cpp +++ llvm/lib/MC/MCContext.cpp @@ -732,6 +732,10 @@ return Result; } +bool MCContext::hasXCOFFSection(StringRef Section, XCOFF::CsectProperties CsectProp) const { + return XCOFFUniquingMap.count(XCOFFSectionKey(Section.str(), CsectProp.MappingClass)) != 0; +} + MCSectionXCOFF *MCContext::getXCOFFSection( StringRef Section, SectionKind Kind, Optional CsectProp, bool MultiSymbolsAllowed, Index: llvm/lib/MC/MCStreamer.cpp =================================================================== --- llvm/lib/MC/MCStreamer.cpp +++ llvm/lib/MC/MCStreamer.cpp @@ -1180,6 +1180,10 @@ "XCOFF targets"); } +void MCStreamer::emitXCOFFRefDirective(StringRef Name) { + llvm_unreachable("emitXCOFFRefDirective is only supported on XCOFF targets"); +} + void MCStreamer::emitELFSize(MCSymbol *Symbol, const MCExpr *Value) {} void MCStreamer::emitELFSymverDirective(const MCSymbol *OriginalSym, StringRef Name, bool KeepOriginalSym) {} Index: llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -235,6 +235,8 @@ void emitFunctionBodyEnd() override; + void emitPGORefs(); + void emitEndOfAsmFile(Module &) override; void emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const override; @@ -2432,12 +2434,29 @@ }); } +void PPCAIXAsmPrinter::emitPGORefs() { + if (OutContext.hasXCOFFSection("__llvm_prf_cnts", XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD))) { + MCSection *CntsSection = OutContext.getXCOFFSection("__llvm_prf_cnts", SectionKind::getData(), + XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD), /*MultiSymbolsAllowed*/ true); + + OutStreamer->SwitchSection(CntsSection); + if (OutContext.hasXCOFFSection("__llvm_prf_data", XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD))) + OutStreamer->emitXCOFFRefDirective("__llvm_prf_data[RW]"); + if (OutContext.hasXCOFFSection("__llvm_prf_names", XCOFF::CsectProperties(XCOFF::XMC_RO, XCOFF::XTY_SD))) + OutStreamer->emitXCOFFRefDirective("__llvm_prf_names[RO]"); + if (OutContext.hasXCOFFSection("__llvm_prf_vnds", XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD))) + OutStreamer->emitXCOFFRefDirective("__llvm_prf_vnds[RW]"); + } +} + void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) { // If there are no functions and there are no toc-data definitions in this // module, we will never need to reference the TOC base. if (M.empty() && TOCDataGlobalVars.empty()) return; + emitPGORefs(); + // Switch to section to emit TOC base. OutStreamer->SwitchSection(getObjFileLowering().getTOCBaseSection()); Index: llvm/test/CodeGen/PowerPC/pgo-ref-directive.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/pgo-ref-directive.ll @@ -0,0 +1,82 @@ +; RUN: rm -rf %t && split-file %s %t + +; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff -xcoff-traceback-table=false < %t/no-ref.ll | FileCheck %s --check-prefixes=NOREF +; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff -xcoff-traceback-table=false < %t/no-vnds.ll | FileCheck %s --check-prefixes=NOVNDS +; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff -xcoff-traceback-table=false < %t/with-vnds.ll | FileCheck %s --check-prefixes=WITHVNDS + + +;--- no-ref.ll +; The absence of a __llvm_prf_cnts section should stop generating the .refs. +; +target datalayout = "E-m:a-p:32:32-i64:64-n32" +target triple = "powerpc-ibm-aix7.2.0.0" + +@__profd_main = private global i64 zeroinitializer, section "__llvm_prf_data", align 8 +@__llvm_prf_nm = private constant [6 x i8] c"\04\00main", section "__llvm_prf_names", align 1 + +@llvm.used = appending global [2 x i8*] + [i8* bitcast (i64* @__profd_main to i8*), + i8* getelementptr inbounds ([6 x i8], [6 x i8]* @__llvm_prf_nm, i32 0, i32 0)], section "llvm.metadata" + +define i32 @main() #0 { +entry: + ret i32 1 +} + +; NOREF-NOT: .ref __llvm_prf_data +; NOREF-NOT: .ref __llvm_prf_names +; NOREF-NOT: .ref __llvm_prf_vnds + +;--- no-vnds.ll +; This is the most common case. When -fprofile-generate is used and there exists executable code, we generate the __llvm_prf_cnts, __llvm_prf_data, and __llvm_prf_names sections. +; +target datalayout = "E-m:a-p:32:32-i64:64-n32" +target triple = "powerpc-ibm-aix7.2.0.0" + +@__profc_main = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", align 8 +@__profd_main = private global i64 zeroinitializer, section "__llvm_prf_data", align 8 +@__llvm_prf_nm = private constant [6 x i8] c"\04\00main", section "__llvm_prf_names", align 1 + +@llvm.used = appending global [3 x i8*] + [i8* bitcast ([1 x i64]* @__profc_main to i8*), + i8* bitcast (i64* @__profd_main to i8*), + i8* getelementptr inbounds ([6 x i8], [6 x i8]* @__llvm_prf_nm, i32 0, i32 0)], section "llvm.metadata" + +define i32 @main() #0 { +entry: + ret i32 1 +} +; There will be two __llvm_prf_cnts .csects, one to represent the actual csect +; that holds @__profc_main, and one generated to hold the .ref directives. In +; XCOFF, a csect can be defined in pieces, so this is is legal assembly. +; +; NOVNDS: .csect __llvm_prf_cnts[RW],3 +; NOVNDS: .csect __llvm_prf_cnts[RW],3 +; NOVNDS-NEXT: .ref __llvm_prf_data[RW] +; NOVNDS-NEXT: .ref __llvm_prf_names[RO] +; NOVNDS-NOT: .ref __llvm_prf_vnds + +;--- with-vnds.ll +; When value profiling is needed, the PGO instrumentation generates variables in the __llvm_prf_vnds section, so we generate a .ref for them too. +; +@__profc_main = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", align 8 +@__profd_main = private global i64 zeroinitializer, section "__llvm_prf_data", align 8 +@__llvm_prf_nm = private constant [6 x i8] c"\04\00main", section "__llvm_prf_names", align 1 +@__llvm_prf_vnodes = private global [10 x { i64, i64, i8* }] zeroinitializer, section "__llvm_prf_vnds" + +@llvm.used = appending global [4 x i8*] + [i8* bitcast ([1 x i64]* @__profc_main to i8*), + i8* bitcast (i64* @__profd_main to i8*), + i8* getelementptr inbounds ([6 x i8], [6 x i8]* @__llvm_prf_nm, i32 0, i32 0), + i8* bitcast ([10 x { i64, i64, i8* }]* @__llvm_prf_vnodes to i8*)], section "llvm.metadata" + +define i32 @main() #0 { +entry: + ret i32 1 +} + +; WITHVNDS: .csect __llvm_prf_cnts[RW],3 +; WITHVNDS: .csect __llvm_prf_cnts[RW],3 +; WITHVNDS-NEXT: .ref __llvm_prf_data[RW] +; WITHVNDS-NEXT: .ref __llvm_prf_names[RO] +; WITHVNDS-NEXT: .ref __llvm_prf_vnds[RW]