Index: lib/Target/AMDGPU/AMDGPU.h =================================================================== --- lib/Target/AMDGPU/AMDGPU.h +++ lib/Target/AMDGPU/AMDGPU.h @@ -90,6 +90,10 @@ ModulePass *createAMDGPUOpenCLImageTypeLoweringPass(); FunctionPass *createAMDGPUAnnotateUniformValues(); +FunctionPass* createAMDGPUUnifyMetadataPass(); +void initializeAMDGPUUnifyMetadataPass(PassRegistry&); +extern char &AMDGPUUnifyMetadataID; + void initializeSIFixControlFlowLiveIntervalsPass(PassRegistry&); extern char &SIFixControlFlowLiveIntervalsID; Index: lib/Target/AMDGPU/AMDGPUTargetMachine.h =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -50,6 +50,7 @@ TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } + void addEarlyAsPossiblePasses(PassManagerBase &PM) override; }; //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -34,6 +34,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Vectorize.h" +#include "llvm/IR/LegacyPassManager.h" using namespace llvm; @@ -77,6 +78,7 @@ initializeAMDGPUAnnotateUniformValuesPass(*PR); initializeAMDGPUPromoteAllocaPass(*PR); initializeAMDGPUCodeGenPreparePass(*PR); + initializeAMDGPUUnifyMetadataPass(*PR); initializeSIAnnotateControlFlowPass(*PR); initializeSIInsertWaitsPass(*PR); initializeSIWholeQuadModePass(*PR); @@ -181,6 +183,10 @@ FSAttr.getValueAsString(); } +void AMDGPUTargetMachine::addEarlyAsPossiblePasses(PassManagerBase &PM) { + PM.add(llvm::createAMDGPUUnifyMetadataPass()); +} + //===----------------------------------------------------------------------===// // R600 Target Machine (R600 -> Cayman) //===----------------------------------------------------------------------===// Index: lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp =================================================================== --- /dev/null +++ lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp @@ -0,0 +1,169 @@ +//===-- AMDGPUUnifyMetadata.cpp - Unify OpenCL metadata -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// \file +// \brief This pass that unifies multiple OpenCL metadata due to linking. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" + +using namespace llvm; + +namespace { + namespace kOCLMD { + const char SpirVer[] = "opencl.spir.version"; + const char OCLVer[] = "opencl.ocl.version"; + const char UsedExt[] = "opencl.used.extensions"; + const char UsedOptCoreFeat[] = "opencl.used.optional.core.features"; + const char CompilerOptions[] = "opencl.compiler.options"; + const char LLVMIdent[] = "llvm.ident"; + } + + MDNode* getMDString(LLVMContext *Context, StringRef Str) { + SmallVector ValueVec; + if (!Str.empty()) + ValueVec.push_back(MDString::get(*Context, Str)); + return MDNode::get(*Context, ValueVec); + } + + /// \brief Unify multiple OpenCL metadata due to linking. + class AMDGPUUnifyMetadata : public FunctionPass { + public: + static char ID; + explicit AMDGPUUnifyMetadata() : FunctionPass(ID) {}; + + private: + // This should really be a module pass but we have to run it as early + // as possible, so given function passes are executed first and + // TargetMachine::addEarlyAsPossiblePasses() expects only function passes + // it has to be a function pass. + virtual bool runOnModule(Module &M); + + // \todo: Convert to a module pass. + virtual bool runOnFunction(Function &F); + + /// \brief Unify version metadata. + /// \return true if changes are made. + /// Assume the named metadata has operands each of which is a pair of + /// integer constant, e.g. + /// !Name = {!n1, !n2} + /// !n1 = {i32 1, i32 2} + /// !n2 = {i32 2, i32 0} + /// Keep the largest version as the sole operand if PickFirst is false. + /// Otherwise pick it from the first value, representing kernel module. + bool unifyVersionMD(Module &M, StringRef Name, bool PickFirst) { + auto NamedMD = M.getNamedMetadata(Name); + if (!NamedMD) + return false; + auto E = NamedMD->getNumOperands(); + if (E <= 1) + return false; + if (PickFirst) E = 1; + MDNode *MaxMD = nullptr; + auto MaxVer = 0U; + for (auto I = 0U; I != E; ++I) { + auto VersionMD = NamedMD->getOperand(I); + assert(VersionMD->getNumOperands() == 2); + auto CMajor = mdconst::extract(VersionMD->getOperand(0)); + auto VersionMajor = CMajor->getZExtValue(); + auto CMinor = mdconst::extract(VersionMD->getOperand(1)); + auto VersionMinor = CMinor->getZExtValue(); + auto Ver = (VersionMajor * 100) + (VersionMinor * 10); + if (Ver > MaxVer) { + MaxVer = Ver; + MaxMD = VersionMD; + } + } + NamedMD->eraseFromParent(); + NamedMD = M.getOrInsertNamedMetadata(Name); + NamedMD->addOperand(MaxMD); + return true; + } + + /// \brief Unify version metadata. + /// \return true if changes are made. + /// Assume the named metadata has operands each of which is a string e.g. + /// !Name = {!n1, !n2} + /// !n1 = !"cl_khr_fp16" + /// !n2 = !"cl_khr_image" + /// Concatenate the strings as the sole operand. + bool unifyExtensionMD(Module &M, StringRef Name) { + auto NamedMD = M.getNamedMetadata(Name); + if (!NamedMD) return false; + auto E = NamedMD->getNumOperands(); + if (E == 1) return false; + std::string All; + bool First = true; + for (auto I = 0U; I != E; ++I) { + auto MD = NamedMD->getOperand(I); + if (MD->getNumOperands() == 0) + continue; + auto MDS = dyn_cast(MD->getOperand(0))->getString(); + if (MDS.empty()) + continue; + if (!First) { + // Do not produce duplicated strings. + if (All.find(MDS) != std::string::npos) + continue; + All += ' '; + } + First = false; + All += MDS.str(); + } + NamedMD->eraseFromParent(); + NamedMD = M.getOrInsertNamedMetadata(Name); + NamedMD->addOperand(getMDString(&M.getContext(), All)); + return true; + } +}; + +} // end anonymous namespace + +char AMDGPUUnifyMetadata::ID = 0; + +char &llvm::AMDGPUUnifyMetadataID = AMDGPUUnifyMetadata::ID; + +INITIALIZE_PASS(AMDGPUUnifyMetadata, "amdgpu-unify-metadata", + "Unify multiple OpenCL metadata due to linking", + false, false); + +FunctionPass* llvm::createAMDGPUUnifyMetadataPass() { + return new AMDGPUUnifyMetadata(); +} + +bool AMDGPUUnifyMetadata::runOnModule(Module &M) { + const char* Vers[] = { + kOCLMD::SpirVer, + kOCLMD::OCLVer + }; + const char* Exts[] = { + kOCLMD::UsedExt, + kOCLMD::UsedOptCoreFeat, + kOCLMD::CompilerOptions, + kOCLMD::LLVMIdent + }; + + bool Changed = false; + + for (auto &I:Vers) + Changed |= unifyVersionMD(M, I, true); + + for (auto &I:Exts) + Changed |= unifyExtensionMD(M, I); + + return Changed; +} + +bool AMDGPUUnifyMetadata::runOnFunction(Function &F) { + return runOnModule(*F.getParent()); +} Index: lib/Target/AMDGPU/CMakeLists.txt =================================================================== --- lib/Target/AMDGPU/CMakeLists.txt +++ lib/Target/AMDGPU/CMakeLists.txt @@ -41,6 +41,7 @@ AMDGPUISelDAGToDAG.cpp AMDGPUMCInstLower.cpp AMDGPUMachineFunction.cpp + AMDGPUUnifyMetadata.cpp AMDGPUOpenCLImageTypeLoweringPass.cpp AMDGPUSubtarget.cpp AMDGPUTargetMachine.cpp Index: test/CodeGen/AMDGPU/unify-metadata.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/unify-metadata.ll @@ -0,0 +1,20 @@ +; RUN: opt -mtriple=amdgcn--amdhsa -amdgpu-unify-metadata -S < %s | FileCheck -check-prefix=ALL %s + +; This test check that we have a singe metadata value after linking several +; modules for records such as opencl.ocl.version, llvm.ident and similar. + +; ALL: !opencl.ocl.version = !{!0} +; ALL: !llvm.ident = !{!1} +; ALL: !0 = !{i32 1, i32 2} +; ALL: !1 = !{!"clang version 4.0 "} + +define void @test() { + ret void +} + +!opencl.ocl.version = !{!1, !0, !0, !0} +!llvm.ident = !{!2, !2, !2, !2} + +!0 = !{i32 2, i32 0} +!1 = !{i32 1, i32 2} +!2 = !{!"clang version 4.0 "}