Index: lib/Target/HSAIL/AMDOpenCLKernenv.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/AMDOpenCLKernenv.h @@ -0,0 +1,69 @@ +//===-- AMDOpenCLKernenv.h - HSAIL kernenv for OpenCL -----*- C++ -*---=======// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// \file +// \brief Declare OpenCL dispatch-specific constants that are passed +// as additional arguments (the "kernenv") to the HSAIL kernel. +// +//===----------------------------------------------------------------------===// + +#ifndef __AMD_OPENCL_KERNENV_H__ +#define __AMD_OPENCL_KERNENV_H__ + +#include +#include +#include + +namespace llvm { +class LLVMContext; + +namespace HSAIL { +enum { + KE_GOFFSET_0, + KE_GOFFSET_1, + KE_GOFFSET_2, + KE_OCL12_NUM_ARGS, + KE_PRINTF_BFR = KE_OCL12_NUM_ARGS, + KE_VQ_PTR, + KE_AQLWRAP_PTR, + KE_OCL20_NUM_ARGS, + KE_NUM_ARGS = KE_OCL20_NUM_ARGS // Always the last member. +}; +} + +static inline StringRef getKernenvArgName(unsigned ID) { + switch (ID) { + case HSAIL::KE_GOFFSET_0: + return "__global_offset_0"; + case HSAIL::KE_GOFFSET_1: + return "__global_offset_1"; + case HSAIL::KE_GOFFSET_2: + return "__global_offset_2"; + case HSAIL::KE_PRINTF_BFR: + return "__printf_buffer"; + case HSAIL::KE_VQ_PTR: + return "__vqueue_pointer"; + case HSAIL::KE_AQLWRAP_PTR: + return "__aqlwrap_pointer"; + default: + llvm_unreachable("unexpected Kernenv argument ID"); + } +} + +static inline Type *getKernenvArgType(unsigned ID, LLVMContext &C, + bool is64bit) { + if (ID == HSAIL::KE_PRINTF_BFR) + return Type::getInt8PtrTy(C, 1); + return (is64bit ? Type::getInt64Ty(C) : Type::getInt32Ty(C)); +} + +static inline StringRef getKernenvArgTypeName(unsigned ID) { return "size_t"; } +} + +#endif // __AMD_OPENCL_KERNENV_H__ Index: lib/Target/HSAIL/BRIGAsmPrinter/BRIGAsmPrinter.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/BRIGAsmPrinter/BRIGAsmPrinter.h @@ -0,0 +1,235 @@ +//===- BRIGAsmPrinter.h - Convert HSAIL LLVM code to assembly ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef _BRIG_ASM_PRINTER_H_ +#define _BRIG_ASM_PRINTER_H_ + +#include "HSAIL.h" +#include "HSAILAsmPrinter.h" +#include "HSAILBrig.h" +#include "HSAILSubtarget.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/AsmPrinter.h" + +#include "MCTargetDesc/RawVectorOstream.h" + +#include "libHSAIL/HSAILBrigantine.h" + +namespace llvm { + +class Argument; +class HSAILKernelManager; +class HSAILMachineFunctionInfo; +class MachineBasicBlock; +class MachineConstantPoolValue; +class MachineFunction; +class MachineInstr; +class MachineJumptableInfo; +class MCContext; +class MCInst; +class MCStreamer; +class MCSymbol; +class Module; +class raw_ostream; + +class LLVM_LIBRARY_VISIBILITY BRIGAsmPrinter : public HSAILAsmPrinter { + friend class StoreInitializer; + + const HSAILSubtarget *Subtarget; + + // Used for the callee part of the calling convention and be cleared + // at every function + std::string FuncArgsStr; + std::string FuncRetValStr; + + // Do counting for returned values and parameters of a function and + // be cleared at every function + unsigned retValCounter; + unsigned paramCounter; + +public: + explicit BRIGAsmPrinter(TargetMachine &TM, + std::unique_ptr Streamer); + + virtual ~BRIGAsmPrinter(); + + const char *getPassName() const override { return "BRIG Container Filler"; } + + void EmitGlobalVariable(const GlobalVariable *GV) override; + + void EmitFunctionLabel(const Function &F, const GlobalAlias *GA = nullptr); + + void EmitStartOfAsmFile(Module &) override; + + void EmitEndOfAsmFile(Module &) override; + + void EmitFunctionBodyStart() override; + + void EmitFunctionBodyEnd() override; + + void EmitInstruction(const MachineInstr *) override; + + void EmitFunctionEntryLabel() override; + + bool doFinalization(Module &M) override; + +public: + bool runOnMachineFunction(MachineFunction &F) override; + void EmitSamplerDefs(); + + // Vector that keeps offsets and sizes (in bits) of all the BRIG variables + // generated for some vector argument + typedef SmallVector, 16> VectorArgumentOffsets; + + bool getGlobalVariableOffset(const GlobalVariable *GV, + uint64_t *result) const; + bool getFunctionScalarArgumentOffset(const std::string &argName, + uint64_t *result) const; + bool getFunctionVectorArgumentOffsets(const std::string &argName, + VectorArgumentOffsets &result) const; + bool getGroupVariableOffset(const GlobalVariable *GV, uint64_t *result) const; + +protected: + virtual void emitMacroFunc(const MachineInstr *MI, raw_ostream &O); + + HSAILTargetMachine *mTM; + const HSAILInstrInfo *TII; + HSAILKernelManager *mMeta; // Metadata required by the runtime. + HSAILMachineFunctionInfo *mMFI; // Current function being processed. + + /// Name of the kernel wrapper of the current function. + std::string mKernelName; + bool m_bIsKernel; // True if the current function being processed is an OpenCL + // kernel. + + HSAIL_ASM::BrigContainer bc; + mutable HSAIL_ASM::Brigantine brigantine; + + static char getSymbolPrefixForAddressSpace(unsigned AS); + char getSymbolPrefix(const MCSymbol &S) const; + + void BrigEmitInitVarWithAddressPragma(StringRef VarName, uint64_t BaseOffset, + const MCExpr *E, unsigned EltSize); + void BrigEmitGlobalInit(HSAIL_ASM::DirectiveVariable, Type *EltTy, + Constant *); + void BrigEmitOperand(const MachineInstr *MI, unsigned opNum, + HSAIL_ASM::Inst inst); + void BrigEmitOperandLdStAddress(const MachineInstr *MI, unsigned opNum, + unsigned Segment); + void BrigEmitVecArgDeclaration(const MachineInstr *MI); + void BrigEmitOperandImage(const MachineInstr *MI, unsigned opNum); + void BrigEmitImageInst(const MachineInstr *MI, HSAIL_ASM::InstImage inst); + + HSAIL_ASM::InstBasic BrigEmitInstBasic(const MachineInstr &MI, + unsigned BrigOpc); + HSAIL_ASM::InstMod BrigEmitInstMod(const MachineInstr &MI, unsigned BrigOpc); + HSAIL_ASM::InstCmp BrigEmitInstCmp(const MachineInstr &MI, unsigned BrigOpc); + HSAIL_ASM::InstCvt BrigEmitInstCvt(const MachineInstr &MI, unsigned BrigOpc); + HSAIL_ASM::InstSourceType BrigEmitInstSourceType(const MachineInstr &MI, + unsigned BrigOpc); + + HSAIL_ASM::InstLane BrigEmitInstLane(const MachineInstr &MI, + unsigned BrigOpc); + HSAIL_ASM::InstBr BrigEmitInstBr(const MachineInstr &MI, unsigned BrigOpc); + HSAIL_ASM::InstSeg BrigEmitInstSeg(const MachineInstr &MI, unsigned BrigOpc); + HSAIL_ASM::InstSegCvt BrigEmitInstSegCvt(const MachineInstr &MI, + unsigned BrigOpc); + HSAIL_ASM::InstMemFence BrigEmitInstMemFence(const MachineInstr &MI, + unsigned BrigOpc); + HSAIL_ASM::InstMem BrigEmitInstMem(const MachineInstr &MI, unsigned BrigOpc); + HSAIL_ASM::InstAtomic BrigEmitInstAtomic(const MachineInstr &MI, + unsigned BrigOpc); + HSAIL_ASM::InstImage BrigEmitInstImage(const MachineInstr &MI, + unsigned BrigOpc); + HSAIL_ASM::InstAddr BrigEmitInstAddr(const MachineInstr &MI, + unsigned BrigOpc); + + void BrigEmitVecOperand(const MachineInstr *MI, unsigned opStart, + unsigned numRegs, HSAIL_ASM::Inst inst); + + // Stream that captures DWARF data to the internal buffer. + RawVectorOstream *mDwarfStream; + + // Stream that will receive all BRIG data. + raw_ostream *mBrigStream; + + // Stream that will receive all captured DWARF data in the case of -odebug. + raw_fd_ostream *mDwarfFileStream; + + // Table that stores offsets of all emitted global variables - used in DWARF. + std::map globalVariableOffsets; + typedef std::map::const_iterator + gvo_iterator; + + // Table that stores offsets of scalar arguments of function being emitted - + // used in DWARF. + typedef std::map ScalarArgumentOffsetsMap; + typedef ScalarArgumentOffsetsMap::const_iterator fao_iterator; + ScalarArgumentOffsetsMap functionScalarArgumentOffsets; + + // Table that stores offsets of BRIG variables generated for vector arguments + // - used in DWARF. + typedef std::map VectorArgumentOffsetsMap; + typedef VectorArgumentOffsetsMap::const_iterator fvo_iterator; + VectorArgumentOffsetsMap functionVectorArgumentOffsets; + + // Tables that store offsets of private and group variables - used in both + // DWARF and EmitFunctionBodyStart. + typedef DenseMap PVGVOffsetMap; + PVGVOffsetMap groupVariablesOffsets; + typedef PVGVOffsetMap::iterator pvgvo_iterator; + typedef PVGVOffsetMap::const_iterator pvgvo_const_iterator; + typedef PVGVOffsetMap::value_type pvgvo_record; + +private: + HSAIL_ASM::ItemList m_opndList; + + int mBuffer; + + typedef enum { + ARG_TYPE_NONE = 0, + ARG_TYPE_FUNC = 1, + ARG_TYPE_KERNEL = 2, + ARG_TYPE_POINTER = 3, + ARG_TYPE_VALUE = 4 + } HSAIL_ARG_TYPE; + + std::string getHSAILArgType(Type *type, + HSAIL_ARG_TYPE arg_type = ARG_TYPE_NONE); + BrigSegment8_t getHSAILSegment(unsigned AddressSpace) const; + BrigSegment8_t getHSAILSegment(const GlobalVariable *gv) const; + + BrigAtomicOperation getAtomicOpcode(const MachineInstr *MI) const; + BrigSegment getAtomicSegment(const MachineInstr *MI) const; + BrigMemoryOrder getAtomicOrder(const MachineInstr *MI) const; + BrigMemoryScope getAtomicScope(const MachineInstr *MI) const; + BrigType getAtomicType(const MachineInstr *MI) const; + + bool canInitHSAILAddressSpace(const GlobalVariable *gv) const; + void EmitBasicBlockStart(const MachineBasicBlock &MBB) const override; + // returns an offset of corresponding DirectiveVariable + uint64_t EmitFunctionArgument(Type *type, bool isKernel, + const StringRef argName, bool isSExt); + void EmitFunctionReturn(Type *type, bool isKernel, StringRef RetName, + bool isSExt); + + bool usesGCNAtomicCounter(void); + + HSAIL_ASM::OperandRegister getBrigReg(MachineOperand s); + + HSAIL_ASM::DirectiveVariable EmitLocalVariable(const GlobalVariable *GV, + BrigSegment8_t segment); + + BrigAlignment8_t getBrigAlignment(unsigned align_value); + + HSAIL_ASM::Inst EmitInstructionImpl(const MachineInstr *); +}; + +} + +#endif // _BRIG_ASM_PRINTER_H_ Index: lib/Target/HSAIL/BRIGAsmPrinter/BRIGAsmPrinter.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/BRIGAsmPrinter/BRIGAsmPrinter.cpp @@ -0,0 +1,1945 @@ +//===-- BRIGAsmPrinter.cpp - BRIG object emitter via libHSAIL -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// The BRIGAsmPrinter is used to emit BRIG object code or HSAIL text (via +/// dissembling) using libHSAIL +// +//===----------------------------------------------------------------------===// +// + +#include "BRIGAsmPrinter.h" +#include "HSAILKernelManager.h" +#include "HSAILOpaqueTypes.h" +#include "HSAILStoreInitializer.h" +#include "HSAILTargetMachine.h" +#include "HSAILUtilityFunctions.h" +#include "InstPrinter/HSAILInstPrinter.h" +#include "MCTargetDesc/BRIGDwarfStreamer.h" + +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/MC/MCValue.h" + +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/TargetRegistry.h" + +#include "../lib/CodeGen/AsmPrinter/DwarfDebug.h" + +#include "LibHSAILAdapters.h" + +#include "libHSAIL/HSAILDisassembler.h" +#include "libHSAIL/HSAILDump.h" +#include "libHSAIL/HSAILParser.h" +#include "libHSAIL/HSAILValidator.h" + +#include +#include +#include + +using namespace llvm; + +static cl::opt DebugInfoFilename("odebug", + cl::desc("Debug Info filename"), + cl::value_desc("filename"), + cl::init("")); + +static cl::opt DumpOnFailFilename( + "dumpOnFail", + cl::desc("Filename for the BRIG container dump if validation failed"), + cl::value_desc("filename"), cl::init("")); + +static cl::opt + DisableValidator("disable-validator", + cl::desc("Disable validation of the BRIG container"), + cl::init(false), cl::Hidden); + +static HSAIL_ASM::SRef makeSRef(const SmallVectorImpl &Str) { + return HSAIL_ASM::SRef(Str.begin(), Str.end()); +} + +static HSAIL_ASM::SRef makeSRef(StringRef Str) { + return HSAIL_ASM::SRef(Str.begin(), Str.end()); +} + +BrigAtomicOperation +BRIGAsmPrinter::getAtomicOpcode(const MachineInstr *MI) const { + int64_t Val = TII->getNamedModifierOperand(*MI, HSAIL::OpName::op); + assert(Val >= BRIG_ATOMIC_ADD && Val <= BRIG_ATOMIC_XOR); + return static_cast(Val); +} + +BrigSegment BRIGAsmPrinter::getAtomicSegment(const MachineInstr *MI) const { + int64_t Val = TII->getNamedModifierOperand(*MI, HSAIL::OpName::segment); + assert(Val > 0 && Val < BRIG_SEGMENT_AMD_GCN); + return static_cast(Val); +} + +BrigMemoryOrder BRIGAsmPrinter::getAtomicOrder(const MachineInstr *MI) const { + int64_t Val = TII->getNamedModifierOperand(*MI, HSAIL::OpName::order); + assert(Val > 0 && Val <= BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE); + return static_cast(Val); +} + +BrigMemoryScope BRIGAsmPrinter::getAtomicScope(const MachineInstr *MI) const { + int64_t Val = TII->getNamedModifierOperand(*MI, HSAIL::OpName::scope); + assert(Val > 0 && Val <= BRIG_MEMORY_SCOPE_SYSTEM); + return static_cast(Val); +} + +BrigType BRIGAsmPrinter::getAtomicType(const MachineInstr *MI) const { + int Val = TII->getNamedModifierOperand(*MI, HSAIL::OpName::TypeLength); + switch (Val) { + case BRIG_TYPE_B32: + case BRIG_TYPE_S32: + case BRIG_TYPE_U32: + case BRIG_TYPE_B64: + case BRIG_TYPE_S64: + case BRIG_TYPE_U64: + return static_cast(Val); + default: + llvm_unreachable("Unknown BrigType"); + } +} + +// FIXME: Doesn't make sense to rely on address space for this. +char BRIGAsmPrinter::getSymbolPrefixForAddressSpace(unsigned AS) { + return (AS == HSAILAS::GROUP_ADDRESS || AS == HSAILAS::PRIVATE_ADDRESS) ? '%' + : '&'; +} + +char BRIGAsmPrinter::getSymbolPrefix(const MCSymbol &Sym) const { + const GlobalVariable *GV = MMI->getModule()->getNamedGlobal(Sym.getName()); + assert(GV && "Need prefix for undefined GlobalVariable"); + + unsigned AS = GV->getType()->getAddressSpace(); + return getSymbolPrefixForAddressSpace(AS); +} + +void BRIGAsmPrinter::BrigEmitInitVarWithAddressPragma(StringRef VarName, + uint64_t BaseOffset, + const MCExpr *Expr, + unsigned EltSize) { + SmallString<256> InitStr; + raw_svector_ostream O(InitStr); + + MCValue Val; + bool Res = Expr->EvaluateAsRelocatable(Val, nullptr, nullptr); + (void)Res; + assert(Res && "Could not evaluate MCExpr"); + assert(!Val.getSymB() && "Multi-symbol expressions not handled"); + + const MCSymbol &Sym = Val.getSymA()->getSymbol(); + + O << "initvarwithaddress:" << VarName << ':' + << BaseOffset // Offset into the destination. + << ':' << EltSize << ':' << getSymbolPrefix(Sym) << Sym.getName() << ':' + << Val.getConstant(); // Offset of the symbol being written. + + HSAIL_ASM::DirectivePragma pgm = + brigantine.append(); + + HSAIL_ASM::ItemList opnds; + + opnds.push_back(brigantine.createOperandString(makeSRef(O.str()))); + pgm.operands() = opnds; +} + +void BRIGAsmPrinter::BrigEmitGlobalInit(HSAIL_ASM::DirectiveVariable globalVar, + Type *EltTy, Constant *CV) { + if (isa(CV)) // Don't emit anything for undefined initializers. + return; + + BrigType EltBT = static_cast(globalVar.type() & ~BRIG_TYPE_ARRAY); + + size_t typeBytes = HSAIL_ASM::getBrigTypeNumBytes(EltBT); + + bool isArray = globalVar.type() & BRIG_TYPE_ARRAY; + // If this is a trivially null constant, we only need to emit one zero. + if (CV->isNullValue()) { + unsigned NElts = globalVar.dim(); + if (NElts == 0) + NElts = 1; + + uint64_t Size = NElts * typeBytes; + std::unique_ptr Zeros(new char[Size]()); + + // FIXME: Should not have to allocate a zero array for this. + HSAIL_ASM::SRef init(Zeros.get(), Zeros.get() + Size); + globalVar.init() = + brigantine.createOperandConstantBytes(init, EltBT, isArray); + return; + } + + unsigned EltSize = HSAIL_ASM::getBrigTypeNumBytes(EltBT); + + auto Name = globalVar.name().str(); + + StoreInitializer store(EltTy, *this); + store.append(CV, Name); + + if (store.elementCount() > 0) { + globalVar.init() = brigantine.createOperandConstantBytes( + makeSRef(store.str()), EltBT, isArray); + } else { + uint64_t Size = globalVar.dim() * typeBytes; + std::unique_ptr Zeros(new char[Size]()); + + HSAIL_ASM::SRef init(Zeros.get(), Zeros.get() + Size); + globalVar.init() = + brigantine.createOperandConstantBytes(init, EltBT, isArray); + } + + for (const auto &VarInit : store.varInitAddresses()) { + BrigEmitInitVarWithAddressPragma(Name, VarInit.BaseOffset, VarInit.Expr, + EltSize); + } +} + +BRIGAsmPrinter::BRIGAsmPrinter(TargetMachine &TM, + std::unique_ptr Streamer) + : HSAILAsmPrinter(TM, std::move(Streamer)), + Subtarget(nullptr), FuncArgsStr(), + FuncRetValStr(), retValCounter(0), paramCounter(0), + mTM(reinterpret_cast(&TM)), + TII(nullptr), mMeta(new HSAILKernelManager(mTM)), + mMFI(nullptr), m_bIsKernel(false), brigantine(bc), mDwarfStream(nullptr), + mBrigStream(nullptr), mDwarfFileStream(nullptr), + mBuffer(0) { + // Obtain DWARF stream. + BRIGDwarfStreamer &DwarfStreamer = cast(*OutStreamer); + mDwarfStream = DwarfStreamer.getDwarfStream(); + + // Obtain stream for streaming BRIG that came from llc. + mBrigStream = mDwarfStream->getOtherStream(); + + // Disconnect DWARF stream from BRIG stream. + mDwarfStream->releaseStream(); + + if (DebugInfoFilename.size() > 0) { + std::error_code err; + mDwarfFileStream = + new raw_fd_ostream(DebugInfoFilename.c_str(), err, sys::fs::F_Text); + mDwarfStream->setOtherStream(mDwarfFileStream); + } +} + +BRIGAsmPrinter::~BRIGAsmPrinter() { + delete mMeta; + delete mDwarfStream; + delete mDwarfFileStream; +} + +BrigSegment8_t BRIGAsmPrinter::getHSAILSegment(unsigned AddressSpace) const { + switch (AddressSpace) { + case HSAILAS::PRIVATE_ADDRESS: + return BRIG_SEGMENT_PRIVATE; + case HSAILAS::GLOBAL_ADDRESS: + return BRIG_SEGMENT_GLOBAL; + case HSAILAS::READONLY_ADDRESS: + return BRIG_SEGMENT_READONLY; + case HSAILAS::GROUP_ADDRESS: + return BRIG_SEGMENT_GROUP; + case HSAILAS::FLAT_ADDRESS: + return BRIG_SEGMENT_FLAT; + case HSAILAS::REGION_ADDRESS: + return BRIG_SEGMENT_AMD_GCN; + case HSAILAS::KERNARG_ADDRESS: + return BRIG_SEGMENT_KERNARG; + case HSAILAS::ARG_ADDRESS: + return BRIG_SEGMENT_ARG; + case HSAILAS::SPILL_ADDRESS: + return BRIG_SEGMENT_SPILL; + } + llvm_unreachable("Unexpected BRIG address space value"); +} + +BrigSegment8_t BRIGAsmPrinter::getHSAILSegment(const GlobalVariable *gv) const { + return getHSAILSegment(gv->getType()->getAddressSpace()); +} + +bool BRIGAsmPrinter::canInitHSAILAddressSpace(const GlobalVariable *gv) const { + bool canInit; + switch (gv->getType()->getAddressSpace()) { + case HSAILAS::GLOBAL_ADDRESS: + case HSAILAS::READONLY_ADDRESS: + canInit = true; + break; + default: + canInit = false; + break; + } + return canInit; +} + +static BrigLinkage findGlobalBrigLinkage(const GlobalValue &GV) { + switch (GV.getLinkage()) { + case GlobalValue::InternalLinkage: + case GlobalValue::PrivateLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::CommonLinkage: + return BRIG_LINKAGE_MODULE; + + case GlobalValue::ExternalLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + case GlobalValue::AvailableExternallyLinkage: + case GlobalValue::ExternalWeakLinkage: + case GlobalValue::AppendingLinkage: + return BRIG_LINKAGE_PROGRAM; + } + + llvm_unreachable("Invalid linkage type"); +} + +static unsigned getGVAlignment(const GlobalVariable &GV, const DataLayout &DL, + Type *InitTy, Type *EltTy, unsigned NElts, + bool IsLocal) { + unsigned Alignment = GV.getAlignment(); + if (Alignment == 0) + Alignment = DL.getPrefTypeAlignment(InitTy); + else { + // If an alignment is specified, it must be equal to or greater than the + // variable's natural alignment. + + unsigned NaturalAlign = IsLocal ? DL.getPrefTypeAlignment(EltTy) + : DL.getABITypeAlignment(EltTy); + Alignment = std::max(Alignment, NaturalAlign); + } + + // Align arrays at least by 4 bytes. + if (Alignment < 4 && NElts != 0) + Alignment = 4; + + return Alignment; +} + +/// EmitGlobalVariable - Emit the specified global variable to the .s file. +void BRIGAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { + if (HSAIL::isIgnoredGV(GV)) + return; + + const DataLayout &DL = getDataLayout(); + + SmallString<256> NameStr; + getHSAILMangledName(NameStr, GV); + + // Initializer has pointer element type. + Type *InitTy = GV->getType()->getElementType(); + + unsigned NElts = 0; + Type *EltTy = HSAIL::analyzeType(InitTy, NElts, DL); + + HSAIL_ASM::DirectiveVariable globalVar; + // TODO_HSA: pending BRIG_LINKAGE_STATIC implementation in the Finalizer + if (NElts == 0) { + globalVar = brigantine.addVariable(makeSRef(NameStr), getHSAILSegment(GV), + HSAIL::getBrigType(EltTy, DL)); + } else { + globalVar = brigantine.addArrayVariable(makeSRef(NameStr), NElts, + getHSAILSegment(GV), + HSAIL::getBrigType(EltTy, DL)); + } + + globalVar.linkage() = findGlobalBrigLinkage(*GV); + globalVar.allocation() = BRIG_ALLOCATION_AGENT; + globalVar.modifier().isDefinition() = 1; + globalVar.dim() = NElts; + + unsigned Align = getGVAlignment(*GV, DL, InitTy, EltTy, NElts, false); + globalVar.align() = getBrigAlignment(Align); + + globalVariableOffsets[GV] = globalVar.brigOffset(); + + // TODO_HSA: if group memory has initializer, then emit instructions to + // initialize dynamically + if (GV->hasInitializer() && canInitHSAILAddressSpace(GV)) { + BrigEmitGlobalInit(globalVar, EltTy, (Constant *)GV->getInitializer()); + } +} + +/// Returns true if StringRef is LLVM intrinsic function that define a mapping +/// between LLVM program objects and the source-level objects. +/// See http://llvm.org/docs/SourceLevelDebugging.html#format_common_intrinsics +/// for more details. +static bool isLLVMDebugIntrinsic(StringRef str) { + return str.equals("llvm.dbg.declare") || str.equals("llvm.dbg.value"); +} + +// Emit a declaration of function F, optionally using the name of alias GA. +void BRIGAsmPrinter::EmitFunctionLabel(const Function &F, + const GlobalAlias *GA) { + if (isLLVMDebugIntrinsic(F.getName())) { + return; // Nothing to do with LLVM debug-related intrinsics + } + + FunctionType *funcType = F.getFunctionType(); + Type *retType = funcType->getReturnType(); + + SmallString<256> Name; + + if (GA) + getHSAILMangledName(Name, GA); + else + getHSAILMangledName(Name, &F); + + HSAIL_ASM::DirectiveFunction fx = brigantine.declFunc(makeSRef(Name)); + // TODO_HSA: pending BRIG_LINKAGE_STATIC implementation in the Finalizer + fx.linkage() = findGlobalBrigLinkage(F); + + const auto &Attrs = F.getAttributes(); + + paramCounter = 0; + if (!retType->isVoidTy()) { + bool IsSExt = + Attrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt); + bool IsZExt = + Attrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt); + + if (IsSExt || IsZExt) { + EmitFunctionReturn(Type::getInt32Ty(retType->getContext()), false, "ret", + IsSExt); + } else + EmitFunctionReturn(retType, false, "ret", IsSExt); + } + + // Loop through all of the parameters and emit the types and + // corresponding names. + Function::const_arg_iterator ai = F.arg_begin(); + Function::const_arg_iterator ae = F.arg_end(); + unsigned n = 1; + + for (FunctionType::param_iterator pb = funcType->param_begin(), + pe = funcType->param_end(); + pb != pe; ++pb, ++ai, ++n) { + assert(ai != ae); + Type *type = *pb; + bool IsSExt = Attrs.hasAttribute(n, Attribute::SExt); + EmitFunctionArgument(type, false, ai->getName(), IsSExt); + } +} + +//===------------------------------------------------------------------===// +// Overridable Hooks +//===------------------------------------------------------------------===// + +/** + * + * + * @param lMF MachineFunction to print the assembly for + * @brief parse the specified machineModel function and print + * out the assembly for all the instructions in the function + * + * @return + */ + +bool BRIGAsmPrinter::runOnMachineFunction(MachineFunction &lMF) { + this->MF = &lMF; + mMeta->setMF(&lMF); + mMFI = lMF.getInfo(); + + Subtarget = &lMF.getSubtarget(); + TII = Subtarget->getInstrInfo(); + + + SetupMachineFunction(lMF); + const Function *F = MF->getFunction(); + OutStreamer->SwitchSection( + getObjFileLowering().SectionForGlobal(F, *Mang, TM)); + m_bIsKernel = HSAIL::isKernelFunc(MF->getFunction()); + mMeta->printHeader(MF->getFunction()->getName()); + + // The need to define global samplers is discovered during instruction + // selection, + // so we emit them at file scope just before a kernel function is emitted. + Subtarget->getImageHandles()->finalize(); + EmitSamplerDefs(); + + EmitFunctionBody(); + + // Clear local handles from image handles + Subtarget->getImageHandles()->clearImageArgs(); + + return false; +} + +void BRIGAsmPrinter::EmitSamplerDefs() { + + HSAILImageHandles *handles = Subtarget->getImageHandles(); + SmallVector samplers = handles->getSamplerHandles(); + + // Emit global sampler defs + for (unsigned i = 0; i < samplers.size(); i++) { + // All sampler defs (samplers with initializers) are global, so we emit + // them only once. + if (!samplers[i]->isEmitted()) { + + HSAIL_ASM::DirectiveVariable samplerVar = brigantine.addSampler( + "&" + samplers[i]->getSym(), + samplers[i]->isRO() ? BRIG_SEGMENT_READONLY : BRIG_SEGMENT_GLOBAL); + samplerVar.align() = BRIG_ALIGNMENT_8; + samplerVar.allocation() = BRIG_ALLOCATION_AGENT; + samplerVar.linkage() = BRIG_LINKAGE_MODULE; + samplerVar.modifier().isDefinition() = 1; + HSAIL_ASM::OperandConstantSampler samplerProps = + brigantine.append(); + // HSAIL_ASM::ItemList samplerInit; + // samplerInit.push_back(samplerProps); + samplerVar.init() = samplerProps; + + int ocl_init = handles->getSamplerValue(i); + + samplerProps.coord() = + (ocl_init & 0x1) ? BRIG_COORD_NORMALIZED : BRIG_COORD_UNNORMALIZED; + + switch (ocl_init & 0x30) { + default: + case 0x10: + samplerProps.filter() = BRIG_FILTER_NEAREST; // CLK_FILTER_NEAREST + break; + case 0x20: + samplerProps.filter() = BRIG_FILTER_LINEAR; // CLK_FILTER_LINEAR + break; + } + + switch (ocl_init & 0xE) { + case 0x0: + samplerProps.addressing() = BRIG_ADDRESSING_UNDEFINED; + break; // CLK_ADDRESS_NONE + case 0x2: + samplerProps.addressing() = BRIG_ADDRESSING_REPEAT; + break; // CLK_ADDRESS_REPEAT + case 0x4: + samplerProps.addressing() = BRIG_ADDRESSING_CLAMP_TO_EDGE; + break; // CLK_ADDRESS_CLAMP_TO_EDGE + case 0x6: + samplerProps.addressing() = BRIG_ADDRESSING_CLAMP_TO_BORDER; + break; // CLK_ADDRESS_CLAMP + case 0x8: + samplerProps.addressing() = BRIG_ADDRESSING_MIRRORED_REPEAT; + break; // CLK_ADDRESS_MIRRORED_REPEAT + } + + samplers[i]->setEmitted(); + } + } +} + +void BRIGAsmPrinter::emitMacroFunc(const MachineInstr *MI, raw_ostream &O) { + StringRef nameRef; + nameRef = MI->getOperand(0).getGlobal()->getName(); + if (nameRef.startswith("barrier")) { + O << '\t'; + O << nameRef; + O << ';'; + return; + } +} + +void BRIGAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { + std::string StrStorage; + raw_string_ostream o(StrStorage); + bool insert_spaces = false; + + if (MBB.pred_empty() || isBlockOnlyReachableByFallthrough(&MBB)) { + o << "// BB#" << MBB.getNumber() << ":"; + insert_spaces = true; + } else { + StringRef name = MBB.getSymbol()->getName(); + brigantine.addLabel(makeSRef(name)); + } + + if (const BasicBlock *BB = MBB.getBasicBlock()) { + if (BB->hasName()) { + if (insert_spaces) + o << " "; + o << "// %" << BB->getName(); + } + } + + const std::string &Str = o.str(); + if (!Str.empty()) + brigantine.addComment(Str.c_str()); + + AsmPrinter::EmitBasicBlockStart(MBB); +} + +namespace { +class autoCodeEmitter { + MCStreamer &streamer; + const HSAIL_ASM::Brigantine &brigantine; + uint64_t lowpc; + uint64_t hipc; + +public: + autoCodeEmitter(MCStreamer &strm, const HSAIL_ASM::Brigantine &brig) + : streamer(strm), brigantine(brig) { + lowpc = brigantine.container().code().size(); + } + + ~autoCodeEmitter() { + hipc = brigantine.container().code().size(); + streamer.SwitchSection( + streamer.getContext().getObjectFileInfo()->getTextSection()); + assert(lowpc <= hipc); + // This is the only way to adjust the size of virtual ELF section + // (type SHT_NOBITS) like .brigcode + streamer.EmitZeros(hipc - lowpc); + } +}; +} + +void BRIGAsmPrinter::EmitInstruction(const MachineInstr *II) { + m_opndList.clear(); + HSAIL_ASM::Inst inst = EmitInstructionImpl(II); + if (inst) { + inst.operands() = m_opndList; + } +} + +HSAIL_ASM::Inst BRIGAsmPrinter::EmitInstructionImpl(const MachineInstr *II) { + // autoCodeEmitter will emit required amount of bytes in corresponding + // MCSection + autoCodeEmitter ace(*OutStreamer, brigantine); + + unsigned Opc = II->getOpcode(); + uint16_t BrigOpc = TII->getBrigOpcode(Opc); + + if (TII->isInstBasic(Opc)) + return BrigEmitInstBasic(*II, BrigOpc); + + if (TII->isInstMod(Opc)) { + // FIXME: Some instructions are available as InstBasic if they don't use + // modifiers. + return BrigEmitInstMod(*II, BrigOpc); + } + + if (TII->isInstCmp(Opc)) + return BrigEmitInstCmp(*II, BrigOpc); + + if (TII->isInstMem(Opc)) + return BrigEmitInstMem(*II, BrigOpc); + + if (TII->isInstCvt(Opc)) + return BrigEmitInstCvt(*II, BrigOpc); + + if (TII->isInstSourceType(Opc)) + return BrigEmitInstSourceType(*II, BrigOpc); + + if (TII->isInstBr(Opc)) + return BrigEmitInstBr(*II, BrigOpc); + + if (TII->isInstMemFence(Opc)) + return BrigEmitInstMemFence(*II, BrigOpc); + + if (TII->isInstAtomic(Opc)) + return BrigEmitInstAtomic(*II, BrigOpc); + + if (TII->isInstImage(Opc)) + return BrigEmitInstImage(*II, BrigOpc); + + if (TII->isInstAddr(Opc)) + return BrigEmitInstAddr(*II, BrigOpc); + + if (TII->isInstLane(Opc)) + return BrigEmitInstLane(*II, BrigOpc); + + if (TII->isInstSeg(Opc)) + return BrigEmitInstSeg(*II, BrigOpc); + + if (TII->isInstSegCvt(Opc)) + return BrigEmitInstSegCvt(*II, BrigOpc); + + switch (II->getOpcode()) { + case HSAIL::RET: + return brigantine.addInst(BRIG_OPCODE_RET, + BRIG_TYPE_NONE); + + case HSAIL::ARG_SCOPE_START: + brigantine.startArgScope(); + return HSAIL_ASM::Inst(); + + case HSAIL::ARG_SCOPE_END: + brigantine.endArgScope(); + return HSAIL_ASM::Inst(); + + case HSAIL::CALL: { + MachineInstr::const_mop_iterator oi = II->operands_begin(); + MachineInstr::const_mop_iterator oe = II->operands_end(); + const GlobalValue *gv = (oi++)->getGlobal(); + + // Place a call + HSAIL_ASM::InstBr call = + brigantine.addInst(BRIG_OPCODE_CALL, BRIG_TYPE_NONE); + call.width() = BRIG_WIDTH_ALL; + + HSAIL_ASM::ItemList ret_list; + for (; oi != oe && oi->isSymbol(); ++oi) { + std::string ret("%"); + ret += oi->getSymbolName(); + ret_list.push_back( + brigantine.findInScopes(ret)); + } + + // Return value and argument symbols are delimited with a 0 value. + assert((oi->isImm() && (oi->getImm() == 0)) || + !"Unexpected target call instruction operand list!"); + + HSAIL_ASM::ItemList call_paramlist; + for (++oi; oi != oe; ++oi) { + if (oi->isSymbol()) { + std::string op("%"); + op += oi->getSymbolName(); + call_paramlist.push_back( + brigantine.findInScopes(op)); + } else { + llvm_unreachable("Unexpected target call instruction operand list!"); + } + } + + SmallString<256> Name; + getHSAILMangledName(Name, gv); + + m_opndList.push_back(brigantine.createCodeList(ret_list)); + m_opndList.push_back(brigantine.createExecutableRef(makeSRef(Name))); + m_opndList.push_back(brigantine.createCodeList(call_paramlist)); + + return call; + } + case HSAIL::ARG_DECL: + BrigEmitVecArgDeclaration(II); + return HSAIL_ASM::Inst(); + default: + llvm_unreachable("unhandled instruction"); + } +} + +bool BRIGAsmPrinter::doFinalization(Module &M) { + +#if 0 + if (getDwarfDebug()) { + // NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); + // Adjust size of fake .brigdirectives section to match actual size of + // BRIG .directives section + OutStreamer->SwitchSection(OutStreamer->getContext().getObjectFileInfo()-> + getDataSection()); + OutStreamer->EmitZeros(brigantine.container().directives().size(), 0); + // This is not needed at this time, because dwarflinker expects + // .brigdirectives size to be zero + DwarfDebug *mDD = getDwarfDebug(); + mDD->endModule(); + delete mDD; + setDwarfDebug(nullptr); + } +#endif + + // LLVM Bug 9761. Nothing should be emitted after EmitEndOfAsmFile() + OutStreamer->FinishImpl(); + + // Allow the target to emit any magic that it wants at the end of the file, + // after everything else has gone out. + EmitEndOfAsmFile(M); + + return false; +} + +void BRIGAsmPrinter::EmitStartOfAsmFile(Module &M) { + // Clear global variable map. + globalVariableOffsets.clear(); + + + const DataLayout &DL = getDataLayout(); + bool IsLargeModel = (DL.getPointerSize(HSAILAS::GLOBAL_ADDRESS) == 8); + + bool IsFullProfile, IsGCN, HasImages; + checkModuleSubtargetExtensions(M, IsFullProfile, IsGCN, HasImages); + + brigantine.startProgram(); + brigantine.module("&__llvm_hsail_module", BRIG_VERSION_HSAIL_MAJOR, + BRIG_VERSION_HSAIL_MINOR, + IsLargeModel ? BRIG_MACHINE_LARGE : BRIG_MACHINE_SMALL, + IsFullProfile ? BRIG_PROFILE_FULL : BRIG_PROFILE_BASE, + BRIG_ROUND_FLOAT_NEAR_EVEN); + + if (IsGCN) + brigantine.addExtension("amd:gcn"); + + if (HasImages) + brigantine.addExtension("IMAGE"); + + // If we are emitting first instruction that occupied some place in BRIG + // we should also emit 4 reserved bytes to the MCSection, so that offsets + // of instructions are the same in the BRIG .code section and MCSection + OutStreamer->SwitchSection( + OutStreamer->getContext().getObjectFileInfo()->getTextSection()); + OutStreamer->EmitZeros( + brigantine.container().code().secHeader()->headerByteCount); + + for (GlobalAlias &GA : M.aliases()) { + if (const Function *F = dyn_cast(GA.getAliasee())) + EmitFunctionLabel(*F, &GA); + else if (isa(GA.getAliasee())) { + llvm_unreachable("Use of alias globals not yet implemented"); + } else + llvm_unreachable("Unhandled alias type"); + } + + for (const GlobalVariable &GV : M.globals()) + EmitGlobalVariable(&GV); + + // Emit function declarations. + for (const Function &F : M.functions()) { + // No declaration for kernels or intrinsics. + if (F.isIntrinsic() || HSAIL::isKernelFunc(&F) || + isHSAILInstrinsic(F.getName())) + continue; + + EmitFunctionLabel(F, nullptr); + } +} + +void BRIGAsmPrinter::EmitEndOfAsmFile(Module &M) { + brigantine.endProgram(); + // Clear global variable map + globalVariableOffsets.clear(); + if (mDwarfStream) { + // Flush all DWARF data captured + mDwarfStream->flush(); + // Stop writing to another stream, if any provided + mDwarfStream->releaseStream(); + // Actual size of captured DWARF data may be less than the size of + // mDwarfStream's internal buffer + const uint64_t dwarfDataSize = mDwarfStream->tell(); + assert(dwarfDataSize && "No DWARF data!"); // sanity check + if (MMI->hasDebugInfo()) { + // Obtain reference to data block + HSAIL_ASM::SRef data = makeSRef(mDwarfStream->getData()); + // \todo1.0 get rid of data copying, stream directly into brig section + brigantine.container().initSectionRaw( + BRIG_SECTION_INDEX_IMPLEMENTATION_DEFINED, "hsa_debug"); + HSAIL_ASM::BrigSectionImpl §ion = brigantine.container().sectionById( + BRIG_SECTION_INDEX_IMPLEMENTATION_DEFINED); + section.insertData(section.size(), data.begin, data.end); + } + } + + // optimizeOperands is not functional as of now + // bc.optimizeOperands(); + HSAIL_ASM::Validator vld(bc); + + bool isValid = true; + if (!DisableValidator) { + isValid = vld.validate(); + } + + if (!isValid) { + errs() << vld.getErrorMsg(nullptr) << '\n'; + // HSAIL_ASM::dump(bc); + if (DumpOnFailFilename.size() > 0) { + std::string info; + + std::ofstream dumpStream(DumpOnFailFilename.c_str()); + HSAIL_ASM::dump(bc, dumpStream); + } + report_fatal_error( + "\n Brig container validation has failed in BRIGAsmPrinter.cpp\n"); + return; + } + + if (mBrigStream) { + if (mTM->HSAILFileType == TargetMachine::CGFT_ObjectFile) { + // Common case + // TBD err stream + RawOstreamWriteAdapter brigAdapter(*mBrigStream, std::cerr); + HSAIL_ASM::BrigIO::save(bc, HSAIL_ASM::FILE_FORMAT_BRIG, brigAdapter); + } else { + HSAIL_ASM::Disassembler disasm(bc); + disasm.log(std::cerr); // TBD err stream + // TBD this is incredibly inefficient + std::stringstream ss; + int result = disasm.run(ss); + if (result) { + assert(!"disasm should not fail if container was validated above"); + } + const std::string &s = ss.str(); + if (!s.empty()) { + mBrigStream->write(s.data(), s.size()); + } + } + } else { + HSAIL_ASM::BrigStreamer::save(bc, "test_output.brig"); + } +} + +HSAIL_ASM::DirectiveVariable +BRIGAsmPrinter::EmitLocalVariable(const GlobalVariable *GV, + BrigSegment8_t segment) { + const DataLayout &DL = getDataLayout(); + + SmallString<256> NameStr; + getHSAILMangledName(NameStr, GV); + + Type *InitTy = GV->getType()->getElementType(); + + unsigned NElts = 0; + Type *EltTy = HSAIL::analyzeType(InitTy, NElts, DL); + unsigned Align = getGVAlignment(*GV, DL, InitTy, EltTy, NElts, true); + + HSAIL_ASM::DirectiveVariable var; + if (NElts != 0) { + BrigType BT = HSAIL::getBrigType(EltTy, DL); + var = brigantine.addArrayVariable(makeSRef(NameStr), NElts, segment, + BT & ~BRIG_TYPE_ARRAY); + } else { + var = brigantine.addVariable(makeSRef(NameStr), segment, + HSAIL::getBrigType(EltTy, getDataLayout())); + } + + var.align() = getBrigAlignment(Align); + + var.allocation() = BRIG_ALLOCATION_AUTOMATIC; + var.linkage() = BRIG_LINKAGE_FUNCTION; + var.modifier().isDefinition() = 1; + + return var; +} + +/// EmitFunctionBodyStart - Targets can override this to emit stuff before +/// the first basic block in the function. +void BRIGAsmPrinter::EmitFunctionBodyStart() { +#if 0 + DwarfDebug *mDD = getDwarfDebug(); + if (mDD) { + //NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); + mDD->beginFunction(MF); + } +#endif + + brigantine.startBody(); + + const Function *F = MF->getFunction(); + + { + bool isKernel = HSAIL::isKernelFunc(F); + if (isKernel) { + // Emitting block data inside of kernel + uint32_t id = 0; + mMeta->setID(id); + mMeta->setKernel(isKernel); + ++mBuffer; + // Preserved this ostream for compatibility only + std::string ignored_FunStr; + raw_string_ostream ignored_OFunStr(ignored_FunStr); + formatted_raw_ostream ignored(ignored_OFunStr); + // D2 does not need to report kernel args info + // mMeta->processArgMetadata(ignored, mBuffer, isKernel); + // We have to call processArgMetadata with ostream before we can emit + // something + mMeta->brigEmitMetaData(brigantine, id, isKernel); + } + } + + // Clear the lists of group variables + groupVariablesOffsets.clear(); + + // Record private/group variable references + for (MachineFunction::const_iterator block = MF->begin(), + endBlock = MF->end(); + block != endBlock; ++block) { + for (MachineBasicBlock::const_iterator inst = block->begin(), + instEnd = block->end(); + inst != instEnd; ++inst) { + const MachineInstr *MI = inst; + for (unsigned int opNum = 0; opNum < MI->getNumOperands(); opNum++) { + const MachineOperand &MO = MI->getOperand(opNum); + if (MO.getType() == MachineOperand::MO_GlobalAddress) { + if (const GlobalVariable *GV = + dyn_cast(MO.getGlobal())) { + switch (GV->getType()->getAddressSpace()) { + case HSAILAS::GROUP_ADDRESS: + groupVariablesOffsets.insert(std::make_pair(GV, 0)); + break; + } + } + } + } + } + } + + // Emit recorded + for (Module::const_global_iterator I = F->getParent()->global_begin(), + E = F->getParent()->global_end(); + I != E; ++I) { + pvgvo_iterator II = groupVariablesOffsets.find(I); + if (II != groupVariablesOffsets.end()) { + HSAIL_ASM::DirectiveVariable var = + EmitLocalVariable(II->first, BRIG_SEGMENT_GROUP); + + II->second = var.brigOffset(); + } + } + + const MachineFrameInfo *MFI = MF->getFrameInfo(); + + uint64_t SpillSize, PrivateSize; + unsigned PrivateAlign, SpillAlign; + computeStackUsage(MFI, PrivateSize, PrivateAlign, SpillSize, SpillAlign); + + if (PrivateSize != 0) { + HSAIL_ASM::DirectiveVariable PrivateStack = + brigantine.addArrayVariable("%__privateStack", PrivateSize, + BRIG_SEGMENT_PRIVATE, BRIG_TYPE_U8); + PrivateStack.align() = getBrigAlignment(PrivateAlign); + PrivateStack.allocation() = BRIG_ALLOCATION_AUTOMATIC; + PrivateStack.linkage() = BRIG_LINKAGE_FUNCTION; + PrivateStack.modifier().isDefinition() = 1; + } + + if (SpillSize != 0) { + HSAIL_ASM::DirectiveVariable SpillStack = brigantine.addArrayVariable( + "%__spillStack", SpillSize, BRIG_SEGMENT_SPILL, BRIG_TYPE_U8); + SpillStack.align() = getBrigAlignment(SpillAlign); + SpillStack.allocation() = BRIG_ALLOCATION_AUTOMATIC; + SpillStack.linkage() = BRIG_LINKAGE_FUNCTION; + SpillStack.modifier().isDefinition() = 1; + } + + const HSAILMachineFunctionInfo *Info = MF->getInfo(); + if (Info->hasScavengerSpill()) { + HSAIL_ASM::DirectiveVariable SpillScavenge = + brigantine.addVariable("%___spillScavenge", + BRIG_SEGMENT_SPILL, BRIG_TYPE_U32); + SpillScavenge.align() = getBrigAlignment(4); + SpillScavenge.allocation() = BRIG_ALLOCATION_AUTOMATIC; + SpillScavenge.linkage() = BRIG_LINKAGE_FUNCTION; + SpillScavenge.modifier().isDefinition() = 1; + } + + retValCounter = 0; + paramCounter = 0; + +#if 0 + if (usesGCNAtomicCounter()) { + HSAIL_ASM::InstBase gcn_region = brigantine.addInst( + BRIG_OPCODE_GCNREGIONALLOC); + brigantine.appendOperand(gcn_region, brigantine.createImmed(4, + BRIG_TYPE_B32)); + } +#endif +} + +void BRIGAsmPrinter::EmitFunctionBodyEnd() { + autoCodeEmitter ace(*OutStreamer, brigantine); + brigantine.endBody(); +} + +void BRIGAsmPrinter::EmitFunctionReturn(Type *Ty, bool IsKernel, + StringRef RetName, bool IsSExt) { + std::string SymName("%"); + SymName += RetName; + + assert((!Ty->isVectorTy() || !Ty->getScalarType()->isIntegerTy(1)) && + "i1 vectors do not work"); + + const DataLayout &DL = getDataLayout(); + + unsigned NElts = ~0u; + Type *EmitTy = HSAIL::analyzeType(Ty, NElts, DL); + + // Construct return symbol. + HSAIL_ASM::DirectiveVariable RetParam; + if (NElts != 0) { + RetParam = brigantine.addArrayVariable( + SymName, NElts, BRIG_SEGMENT_ARG, + HSAIL::getBrigType(EmitTy, DL, IsSExt)); + } else { + RetParam = brigantine.addVariable(SymName, BRIG_SEGMENT_ARG, + HSAIL::getBrigType(EmitTy, DL, IsSExt)); + } + + RetParam.align() = getBrigAlignment(DL.getABITypeAlignment(Ty)); + brigantine.addOutputParameter(RetParam); +} + +uint64_t BRIGAsmPrinter::EmitFunctionArgument(Type *Ty, bool IsKernel, + StringRef ArgName, + bool IsSExt) { + std::string Name; + { + raw_string_ostream Stream(Name); + + if (ArgName.empty()) + Stream << "%arg_p" << paramCounter; + else + Stream << '%' << HSAILParamManager::mangleArg(Mang, ArgName); + } + + paramCounter++; + + const BrigSegment8_t SymSegment = + IsKernel ? BRIG_SEGMENT_KERNARG : BRIG_SEGMENT_ARG; + + HSAIL_ASM::DirectiveVariable Sym; + + OpaqueType OT = GetOpaqueType(Ty); + + // Create the symbol. + if (IsImage(OT)) { + Sym = brigantine.addImage(Name, SymSegment); + Sym.align() = BRIG_ALIGNMENT_8; + } else if (OT == Sampler) { + Sym = brigantine.addSampler(Name, SymSegment); + Sym.align() = BRIG_ALIGNMENT_8; + } else { + const DataLayout &DL = getDataLayout(); + + assert((!Ty->isVectorTy() || !Ty->getScalarType()->isIntegerTy(1)) && + "i1 vectors are broken"); + + unsigned NElts = ~0u; + Type *EmitTy = HSAIL::analyzeType(Ty, NElts, DL); + + if (NElts != 0) { + BrigType EltTy = HSAIL::getBrigType(EmitTy, DL, IsSExt); + Sym = brigantine.addArrayVariable(Name, NElts, SymSegment, EltTy); + } else { + Sym = brigantine.addVariable(Name, SymSegment, + HSAIL::getBrigType(EmitTy, DL, IsSExt)); + } + + Sym.align() = getBrigAlignment(DL.getABITypeAlignment(Ty)); + } + + uint64_t Offset = Sym.brigOffset(); + brigantine.addInputParameter(Sym); + return Offset; +} + +/// Emit the function signature +void BRIGAsmPrinter::EmitFunctionEntryLabel() { + const Function *F = MF->getFunction(); + bool IsKernel = HSAIL::isKernelFunc(F); + const HSAILParamManager &PM = + MF->getInfo()->getParamManager(); + + SmallString<256> NameWithPrefix; + getHSAILMangledName(NameWithPrefix, F); + + HSAIL_ASM::DirectiveExecutable Directive; + if (IsKernel) + Directive = brigantine.declKernel(makeSRef(NameWithPrefix)); + else + Directive = brigantine.declFunc(makeSRef(NameWithPrefix)); + + Directive.linkage() = findGlobalBrigLinkage(*F); + + const auto &Attrs = F->getAttributes(); + + Type *RetType = F->getReturnType(); + if (!RetType->isVoidTy()) { + bool IsSExt = + Attrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt); + bool IsZExt = + Attrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt); + + SmallString<256> ReturnName; + getNameWithPrefix(ReturnName, F); + + if (IsSExt || IsZExt) { + EmitFunctionReturn(Type::getInt32Ty(RetType->getContext()), IsKernel, + ReturnName, IsSExt); + } else { + EmitFunctionReturn(RetType, IsKernel, ReturnName, IsSExt); + } + } + + // Loop through all of the parameters and emit the types and + // corresponding names. + paramCounter = 0; + + // Clear arguments mapping. + functionScalarArgumentOffsets.clear(); + functionVectorArgumentOffsets.clear(); + + HSAILParamManager::param_iterator AI = PM.arg_begin(); + HSAILParamManager::param_iterator AE = PM.arg_end(); + + FunctionType *FTy = F->getFunctionType(); + FunctionType::param_iterator PI = FTy->param_begin(), + PE = FTy->param_end(); + + if (IsKernel && F->hasStructRetAttr()) { + assert(PI != PE && "Invalid struct return function!"); + // If this is a struct-return function, don't process the hidden + // struct-return argument. + ++AI; + ++PI; + } + + for (unsigned N = 1; PI != PE; ++PI, ++AI, ++N) { + assert(AI != AE); + + Type *Ty = *PI; + const char *ArgName = PM.getParamName(*AI); + + // Here we will store an offset of DirectiveVariable + bool IsSExt = Attrs.hasAttribute(N, Attribute::SExt); + uint64_t ArgDirectiveOffset = EmitFunctionArgument(Ty, IsKernel, ArgName, IsSExt); + functionScalarArgumentOffsets[ArgName] = ArgDirectiveOffset; + } +} + +//===------------------------------------------------------------------===// +// Dwarf Emission Helper Routines +//===------------------------------------------------------------------===// + +bool BRIGAsmPrinter::getGroupVariableOffset(const GlobalVariable *GV, + uint64_t *result) const { + pvgvo_const_iterator i = groupVariablesOffsets.find(GV); + if (i == groupVariablesOffsets.end()) { + return false; + } + *result = i->second; + return true; +} + +bool BRIGAsmPrinter::getFunctionScalarArgumentOffset(const std::string &argName, + uint64_t *result) const { + fao_iterator i = functionScalarArgumentOffsets.find(argName); + if (i == functionScalarArgumentOffsets.end()) { + return false; + } + *result = i->second; + return true; +} + +bool BRIGAsmPrinter::getFunctionVectorArgumentOffsets( + const std::string &argName, VectorArgumentOffsets &result) const { + fvo_iterator i = functionVectorArgumentOffsets.find(argName); + if (i == functionVectorArgumentOffsets.end()) { + return false; + } + result = i->second; + return true; +} + +void BRIGAsmPrinter::BrigEmitOperand(const MachineInstr *MI, unsigned opNum, + HSAIL_ASM::Inst inst) { + + int AddressIndex = + HSAIL::getNamedOperandIdx(MI->getOpcode(), HSAIL::OpName::address); + if (AddressIndex != -1) { + unsigned addrStart = AddressIndex; + if (opNum == addrStart) { + unsigned AS = TII->getNamedOperand(*MI, HSAIL::OpName::segment)->getImm(); + BrigEmitOperandLdStAddress(MI, opNum, AS); + return; + } + + // FIXME: This shouldn't be necessary + if ((opNum > addrStart) && + (opNum < addrStart + HSAILADDRESS::ADDRESS_NUM_OPS)) + // Ignore rest of address fields, already emitted. + return; + } + + const MachineOperand &MO = MI->getOperand(opNum); + + BrigType16_t const expType = HSAIL_ASM::getOperandType( + inst, m_opndList.size(), brigantine.getMachineModel(), + brigantine.getProfile()); + + switch (MO.getType()) { + case MachineOperand::MO_Register: + m_opndList.push_back(getBrigReg(MO)); + break; + case MachineOperand::MO_Immediate: + if (expType == BRIG_TYPE_B1) { + m_opndList.push_back( + brigantine.createImmed(MO.getImm() != 0 ? 1 : 0, expType)); + } else { + m_opndList.push_back(brigantine.createImmed(MO.getImm(), expType)); + } + break; + case MachineOperand::MO_FPImmediate: { + const ConstantFP *CFP = MO.getFPImm(); + if (CFP->getType()->isFloatTy()) { + m_opndList.push_back(brigantine.createImmed( + HSAIL_ASM::f32_t::fromRawBits( + *CFP->getValueAPF().bitcastToAPInt().getRawData()), + expType)); + } else if (CFP->getType()->isDoubleTy()) { + m_opndList.push_back(brigantine.createImmed( + HSAIL_ASM::f64_t::fromRawBits( + *CFP->getValueAPF().bitcastToAPInt().getRawData()), + expType)); + } + break; + } + case MachineOperand::MO_MachineBasicBlock: { + std::string sLabel = MO.getMBB()->getSymbol()->getName(); + m_opndList.push_back(brigantine.createLabelRef(sLabel)); + break; + } + default: + llvm_unreachable("unhandled operand type"); + } +} + +void BRIGAsmPrinter::BrigEmitOperandLdStAddress(const MachineInstr *MI, + unsigned opNum, + unsigned Segment) { + assert(opNum + 2 < MI->getNumOperands()); + const MachineOperand &base = MI->getOperand(opNum), + ® = MI->getOperand(opNum + 1), + &offset_op = MI->getOperand(opNum + 2); + + // Get offset + assert(offset_op.isImm()); + int64_t offset = offset_op.getImm(); + + // Get [%name] + std::string base_name; + if (base.isGlobal()) { + SmallString<256> NameStr; + getHSAILMangledName(NameStr, base.getGlobal()); + + base_name = NameStr.str(); + } + // Special cases for spill and private stack + else if (base.isImm()) { + int64_t addr = base.getImm(); + assert(isInt<32>(addr)); + assert(MI->getOpcode() == HSAIL::LD_SAMP); + + BrigEmitOperandImage(MI, opNum); // Constant sampler. + return; + } + // Kernel or function argument + else if (base.isSymbol()) { + base_name = "%"; + base_name.append(base.getSymbolName()); + } else if (base.isMCSymbol()) { + base_name = base.getMCSymbol()->getName(); + } + + // Get [$reg] + HSAIL_ASM::SRef reg_name; + if (reg.isReg() && reg.getReg() != 0) { + reg_name = HSAIL_ASM::SRef(HSAILInstPrinter::getRegisterName(reg.getReg())); + } + + const DataLayout &DL = getDataLayout(); + bool Is32Bit = (DL.getPointerSize(Segment) == 4); + + // Emit operand. + m_opndList.push_back( + brigantine.createRef(base_name, reg_name, offset, Is32Bit)); +} + +void BRIGAsmPrinter::BrigEmitVecArgDeclaration(const MachineInstr *MI) { + const MachineOperand &Symbol = + *TII->getNamedOperand(*MI, HSAIL::OpName::symbol); + + unsigned BT = TII->getNamedModifierOperand(*MI, HSAIL::OpName::TypeLength); + int64_t NElts = TII->getNamedModifierOperand(*MI, HSAIL::OpName::size); + unsigned Align = TII->getNamedModifierOperand(*MI, HSAIL::OpName::alignment); + + SmallString<64> Name; + Name += '%'; + Name += Symbol.getSymbolName(); + + HSAIL_ASM::DirectiveVariable ArgDecl; + if (NElts != 0) { + ArgDecl = brigantine.addArrayVariable(makeSRef(Name), NElts, + BRIG_SEGMENT_ARG, BT); + } else { + ArgDecl = brigantine.addVariable(makeSRef(Name), BRIG_SEGMENT_ARG, BT); + } + + ArgDecl.align() = getBrigAlignment(Align); + ArgDecl.modifier().isDefinition() = true; + ArgDecl.allocation() = BRIG_ALLOCATION_AUTOMATIC; + ArgDecl.linkage() = BRIG_LINKAGE_ARG; + + return; +} + +void BRIGAsmPrinter::BrigEmitOperandImage(const MachineInstr *MI, + unsigned opNum) { + MachineOperand object = MI->getOperand(opNum); + unsigned idx = object.getImm(); + std::string sOp; + // Indices for image_t and sampler_t args are biased, so now we un-bias them. + // Note that the biased values rely on biasing performed by + // HSAILPropagateImageOperands and HSAILISelLowering::LowerFormalArguments. + if (idx < IMAGE_ARG_BIAS) { + // This is the initialized sampler. + HSAILSamplerHandle *hSampler = + Subtarget->getImageHandles()->getSamplerHandle(idx); + assert(hSampler && "Invalid sampler handle"); + std::string samplerName = hSampler->getSym(); + assert(!samplerName.empty() && "Expected symbol here"); + sOp = "&" + samplerName; + } else { + // This is the image + std::string sym = + Subtarget->getImageHandles()->getImageSymbol(idx - IMAGE_ARG_BIAS); + assert(!sym.empty() && "Expected symbol here"); + sOp = "%" + sym; + } + + m_opndList.push_back(brigantine.createRef(sOp)); +} + +HSAIL_ASM::OperandRegister BRIGAsmPrinter::getBrigReg(MachineOperand s) { + assert(s.getType() == MachineOperand::MO_Register); + return brigantine.createOperandReg( + HSAIL_ASM::SRef(HSAILInstPrinter::getRegisterName(s.getReg()))); +} + +void BRIGAsmPrinter::BrigEmitVecOperand(const MachineInstr *MI, + unsigned opStart, unsigned numRegs, + HSAIL_ASM::Inst inst) { + assert(numRegs >= 2 && numRegs <= 4); + HSAIL_ASM::ItemList list; + for (unsigned i = opStart; i < opStart + numRegs; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg()) { + list.push_back(getBrigReg(MO)); + } else if (MO.isImm()) { + BrigType16_t const expType = HSAIL_ASM::getOperandType( + inst, m_opndList.size(), brigantine.getMachineModel(), + brigantine.getProfile()); + list.push_back(brigantine.createImmed(MO.getImm(), expType)); + } + } + m_opndList.push_back(brigantine.createOperandList(list)); +} + +void BRIGAsmPrinter::BrigEmitImageInst(const MachineInstr *MI, + HSAIL_ASM::InstImage inst) { + unsigned opCnt = 0; + + if (inst.geometry() == BRIG_GEOMETRY_2DDEPTH || + inst.geometry() == BRIG_GEOMETRY_2DADEPTH) { + BrigEmitOperand(MI, opCnt++, inst); + } else { + BrigEmitVecOperand(MI, opCnt, 4, inst); + opCnt += 4; + } + + switch (inst.opcode()) { + case BRIG_OPCODE_RDIMAGE: + BrigEmitOperand(MI, opCnt++, inst); + BrigEmitOperand(MI, opCnt++, inst); + break; + case BRIG_OPCODE_LDIMAGE: + case BRIG_OPCODE_STIMAGE: + BrigEmitOperand(MI, opCnt++, inst); + break; + default: + ; + } + + switch (inst.geometry()) { + case BRIG_GEOMETRY_1D: + case BRIG_GEOMETRY_1DB: + BrigEmitOperand(MI, opCnt++, inst); + break; + case BRIG_GEOMETRY_1DA: + case BRIG_GEOMETRY_2D: + case BRIG_GEOMETRY_2DDEPTH: + BrigEmitVecOperand(MI, opCnt, 2, inst); + opCnt += 2; + break; + case BRIG_GEOMETRY_2DA: + case BRIG_GEOMETRY_2DADEPTH: + case BRIG_GEOMETRY_3D: + BrigEmitVecOperand(MI, opCnt, 3, inst); + opCnt += 3; + break; + } +} + +HSAIL_ASM::InstBasic BRIGAsmPrinter::BrigEmitInstBasic(const MachineInstr &MI, + unsigned BrigOpc) { + HSAIL_ASM::InstBasic inst = brigantine.addInst(BrigOpc); + unsigned Opc = MI.getOpcode(); + + inst.type() = TII->getNamedOperand(MI, HSAIL::OpName::TypeLength)->getImm(); + + int DestIdx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::dest); + if (DestIdx != -1) + BrigEmitOperand(&MI, DestIdx, inst); + + int Src0Idx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::src0); + if (Src0Idx != -1) + BrigEmitOperand(&MI, Src0Idx, inst); + + int Src1Idx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::src1); + if (Src1Idx != -1) + BrigEmitOperand(&MI, Src1Idx, inst); + + int Src2Idx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::src2); + if (Src2Idx != -1) + BrigEmitOperand(&MI, Src2Idx, inst); + + return inst; +} + +HSAIL_ASM::InstMod BRIGAsmPrinter::BrigEmitInstMod(const MachineInstr &MI, + unsigned BrigOpc) { + HSAIL_ASM::InstMod inst = brigantine.addInst(BrigOpc); + unsigned Opc = MI.getOpcode(); + + inst.type() = TII->getNamedOperand(MI, HSAIL::OpName::TypeLength)->getImm(); + inst.modifier().ftz() = + TII->getNamedOperand(MI, HSAIL::OpName::ftz)->getImm(); + + inst.round() = TII->getNamedOperand(MI, HSAIL::OpName::round)->getImm(); + + BrigEmitOperand(&MI, HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::dest), + inst); + BrigEmitOperand(&MI, HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::src0), + inst); + + int Src1Idx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::src1); + if (Src1Idx != -1) + BrigEmitOperand(&MI, Src1Idx, inst); + + int Src2Idx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::src2); + if (Src2Idx != -1) + BrigEmitOperand(&MI, Src2Idx, inst); + + return inst; +} + +HSAIL_ASM::InstCmp BRIGAsmPrinter::BrigEmitInstCmp(const MachineInstr &MI, + unsigned BrigOpc) { + HSAIL_ASM::InstCmp inst = brigantine.addInst(BrigOpc); + unsigned Opc = MI.getOpcode(); + + inst.compare() = TII->getNamedOperand(MI, HSAIL::OpName::op)->getImm(); + + inst.modifier().ftz() = + TII->getNamedOperand(MI, HSAIL::OpName::ftz)->getImm(); + + inst.type() = + TII->getNamedOperand(MI, HSAIL::OpName::destTypedestLength)->getImm(); + inst.sourceType() = + TII->getNamedOperand(MI, HSAIL::OpName::sourceType)->getImm(); + + BrigEmitOperand(&MI, HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::dest), + inst); + BrigEmitOperand(&MI, HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::src0), + inst); + BrigEmitOperand(&MI, HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::src1), + inst); + return inst; +} + +HSAIL_ASM::InstCvt BRIGAsmPrinter::BrigEmitInstCvt(const MachineInstr &MI, + unsigned BrigOpc) { + HSAIL_ASM::InstCvt inst = brigantine.addInst(BrigOpc); + unsigned Opc = MI.getOpcode(); + + inst.type() = + TII->getNamedOperand(MI, HSAIL::OpName::destTypedestLength)->getImm(); + inst.sourceType() = + TII->getNamedOperand(MI, HSAIL::OpName::sourceType)->getImm(); + + // XXX - sourceType, destTypedestLength - These names are awful + inst.modifier().ftz() = + TII->getNamedOperand(MI, HSAIL::OpName::ftz)->getImm(); + inst.round() = TII->getNamedOperand(MI, HSAIL::OpName::round)->getImm(); + + BrigEmitOperand(&MI, HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::dest), + inst); + BrigEmitOperand(&MI, HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::src), + inst); + return inst; +} + +HSAIL_ASM::InstSourceType +BRIGAsmPrinter::BrigEmitInstSourceType(const MachineInstr &MI, + unsigned BrigOpc) { + HSAIL_ASM::InstSourceType inst = + brigantine.addInst(BrigOpc); + + unsigned Opc = MI.getOpcode(); + + inst.type() = TII->getNamedOperand(MI, HSAIL::OpName::TypeLength)->getImm(); + inst.sourceType() = + TII->getNamedOperand(MI, HSAIL::OpName::sourceType)->getImm(); + + BrigEmitOperand(&MI, HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::dest), + inst); + BrigEmitOperand(&MI, HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::src0), + inst); + + int Src1Idx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::src1); + if (Src1Idx != -1) + BrigEmitOperand(&MI, Src1Idx, inst); + + int Src2Idx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::src2); + if (Src2Idx != -1) + BrigEmitOperand(&MI, Src2Idx, inst); + + int Src3Idx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::src3); + if (Src3Idx != -1) + BrigEmitOperand(&MI, Src3Idx, inst); + + return inst; +} + +HSAIL_ASM::InstLane BRIGAsmPrinter::BrigEmitInstLane(const MachineInstr &MI, + unsigned BrigOpc) { + HSAIL_ASM::InstLane inst = brigantine.addInst(BrigOpc); + + unsigned Opc = MI.getOpcode(); + + inst.type() = TII->getNamedOperand(MI, HSAIL::OpName::TypeLength)->getImm(); + inst.sourceType() = + TII->getNamedOperand(MI, HSAIL::OpName::sourceType)->getImm(); + + inst.width() = TII->getNamedOperand(MI, HSAIL::OpName::width)->getImm(); + + int DestIdx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::dest); + if (DestIdx != -1) { + BrigEmitOperand(&MI, DestIdx, inst); + } else { + // FIXME: There appears to be a bug when trying to use a custom operand with + // multiple fields in the outs. + int Dest0Idx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::dest0); + BrigEmitVecOperand(&MI, Dest0Idx, 4, inst); + } + + int Src0Idx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::src0); + if (Src0Idx != -1) + BrigEmitOperand(&MI, Src0Idx, inst); + + int Src1Idx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::src1); + if (Src1Idx != -1) + BrigEmitOperand(&MI, Src1Idx, inst); + + int Src2Idx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::src2); + if (Src2Idx != -1) + BrigEmitOperand(&MI, Src2Idx, inst); + + int Src3Idx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::src3); + if (Src3Idx != -1) + BrigEmitOperand(&MI, Src3Idx, inst); + + return inst; +} + +HSAIL_ASM::InstBr BRIGAsmPrinter::BrigEmitInstBr(const MachineInstr &MI, + unsigned BrigOpc) { + HSAIL_ASM::InstBr inst = brigantine.addInst(BrigOpc); + unsigned Opc = MI.getOpcode(); + + inst.type() = TII->getNamedOperand(MI, HSAIL::OpName::TypeLength)->getImm(); + inst.width() = TII->getNamedOperand(MI, HSAIL::OpName::width)->getImm(); + + int DestIdx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::dest); + if (DestIdx != -1) + BrigEmitOperand(&MI, DestIdx, inst); + + int Src0Idx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::src0); + if (Src0Idx != -1) + BrigEmitOperand(&MI, Src0Idx, inst); + + int Src1Idx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::src1); + if (Src1Idx != -1) + BrigEmitOperand(&MI, Src1Idx, inst); + + return inst; +} + +HSAIL_ASM::InstSeg BRIGAsmPrinter::BrigEmitInstSeg(const MachineInstr &MI, + unsigned BrigOpc) { + HSAIL_ASM::InstSeg inst = brigantine.addInst(BrigOpc); + unsigned Opc = MI.getOpcode(); + + inst.type() = TII->getNamedOperand(MI, HSAIL::OpName::TypeLength)->getImm(); + + unsigned Segment = TII->getNamedOperand(MI, HSAIL::OpName::segment)->getImm(); + inst.segment() = getHSAILSegment(Segment); + + int DestIdx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::dest); + if (DestIdx != -1) + BrigEmitOperand(&MI, DestIdx, inst); + + return inst; +} + +HSAIL_ASM::InstSegCvt BRIGAsmPrinter::BrigEmitInstSegCvt(const MachineInstr &MI, + unsigned BrigOpc) { + HSAIL_ASM::InstSegCvt inst = + brigantine.addInst(BrigOpc); + unsigned Opc = MI.getOpcode(); + + inst.type() = + TII->getNamedOperand(MI, HSAIL::OpName::destTypedestLength)->getImm(); + + inst.sourceType() = + TII->getNamedOperand(MI, HSAIL::OpName::sourceType)->getImm(); + + unsigned Segment = TII->getNamedOperand(MI, HSAIL::OpName::segment)->getImm(); + inst.segment() = getHSAILSegment(Segment); + + inst.modifier().isNoNull() = + TII->getNamedOperand(MI, HSAIL::OpName::nonull)->getImm(); + + int DestIdx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::dest); + BrigEmitOperand(&MI, DestIdx, inst); + + int Src0Idx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::src0); + BrigEmitOperand(&MI, Src0Idx, inst); + + return inst; +} + +HSAIL_ASM::InstMemFence +BRIGAsmPrinter::BrigEmitInstMemFence(const MachineInstr &MI, unsigned BrigOpc) { + HSAIL_ASM::InstMemFence inst = + brigantine.addInst(BrigOpc, BRIG_TYPE_NONE); + + // FIXME: libHSAIL seems to not have been updated for change to remove + // separate segment scope modifiers. + inst.memoryOrder() = TII->getNamedModifierOperand(MI, HSAIL::OpName::order); + inst.globalSegmentMemoryScope() = + TII->getNamedModifierOperand(MI, HSAIL::OpName::scope); + inst.groupSegmentMemoryScope() = inst.globalSegmentMemoryScope(); + inst.imageSegmentMemoryScope() = BRIG_MEMORY_SCOPE_NONE; + + return inst; +} + +HSAIL_ASM::InstMem BRIGAsmPrinter::BrigEmitInstMem(const MachineInstr &MI, + unsigned BrigOpc) { + HSAIL_ASM::InstMem inst = brigantine.addInst(BrigOpc); + + unsigned VecSize = 1; // FIXME: Stop special casing this. + switch (MI.getOpcode()) { + case HSAIL::LD_V2_S32: + case HSAIL::LD_V2_U32: + case HSAIL::LD_V2_F32: + case HSAIL::LD_V2_S64: + case HSAIL::LD_V2_U64: + case HSAIL::LD_V2_F64: + + case HSAIL::ST_V2_U32: + case HSAIL::ST_V2_F32: + case HSAIL::ST_V2_U64: + case HSAIL::ST_V2_F64: + + case HSAIL::RARG_LD_V2_S32: + case HSAIL::RARG_LD_V2_U32: + case HSAIL::RARG_LD_V2_F32: + case HSAIL::RARG_LD_V2_S64: + case HSAIL::RARG_LD_V2_U64: + case HSAIL::RARG_LD_V2_F64: + VecSize = 2; + break; + + case HSAIL::LD_V3_S32: + case HSAIL::LD_V3_U32: + case HSAIL::LD_V3_F32: + case HSAIL::LD_V3_S64: + case HSAIL::LD_V3_U64: + case HSAIL::LD_V3_F64: + + case HSAIL::ST_V3_U32: + case HSAIL::ST_V3_F32: + case HSAIL::ST_V3_U64: + case HSAIL::ST_V3_F64: + + case HSAIL::RARG_LD_V3_S32: + case HSAIL::RARG_LD_V3_U32: + case HSAIL::RARG_LD_V3_F32: + case HSAIL::RARG_LD_V3_S64: + case HSAIL::RARG_LD_V3_U64: + case HSAIL::RARG_LD_V3_F64: + VecSize = 3; + break; + + case HSAIL::LD_V4_S32: + case HSAIL::LD_V4_U32: + case HSAIL::LD_V4_F32: + case HSAIL::LD_V4_S64: + case HSAIL::LD_V4_U64: + case HSAIL::LD_V4_F64: + + case HSAIL::ST_V4_U32: + case HSAIL::ST_V4_F32: + case HSAIL::ST_V4_U64: + case HSAIL::ST_V4_F64: + + case HSAIL::RARG_LD_V4_S32: + case HSAIL::RARG_LD_V4_U32: + case HSAIL::RARG_LD_V4_F32: + case HSAIL::RARG_LD_V4_S64: + case HSAIL::RARG_LD_V4_U64: + case HSAIL::RARG_LD_V4_F64: + VecSize = 4; + break; + } + + unsigned Segment = TII->getNamedOperand(MI, HSAIL::OpName::segment)->getImm(); + unsigned Align = TII->getNamedOperand(MI, HSAIL::OpName::align)->getImm(); + + inst.segment() = getHSAILSegment(Segment); + inst.type() = TII->getNamedOperand(MI, HSAIL::OpName::TypeLength)->getImm(); + inst.align() = getBrigAlignment(Align); + inst.equivClass() = 0; + + // FIXME: These operands should always be present. + if (const MachineOperand *Mask = + TII->getNamedOperand(MI, HSAIL::OpName::mask)) + inst.modifier().isConst() = Mask->getImm() & BRIG_MEMORY_CONST; + + if (const MachineOperand *Width = + TII->getNamedOperand(MI, HSAIL::OpName::width)) + inst.width() = Width->getImm(); + else + inst.width() = BRIG_WIDTH_NONE; + + if (VecSize == 1) + BrigEmitOperand(&MI, 0, inst); + else + BrigEmitVecOperand(&MI, 0, VecSize, inst); + + BrigEmitOperandLdStAddress(&MI, VecSize, Segment); + + return inst; +} + +HSAIL_ASM::InstAtomic BRIGAsmPrinter::BrigEmitInstAtomic(const MachineInstr &MI, + unsigned BrigOpc) { + HSAIL_ASM::InstAtomic inst = + brigantine.addInst(BrigOpc); + unsigned Opc = MI.getOpcode(); + + unsigned Segment = TII->getNamedOperand(MI, HSAIL::OpName::segment)->getImm(); + inst.segment() = getHSAILSegment(Segment); + inst.memoryOrder() = TII->getNamedModifierOperand(MI, HSAIL::OpName::order); + inst.memoryScope() = TII->getNamedModifierOperand(MI, HSAIL::OpName::scope); + inst.atomicOperation() = TII->getNamedModifierOperand(MI, HSAIL::OpName::op); + inst.equivClass() = TII->getNamedOperand(MI, HSAIL::OpName::equiv)->getImm(); + inst.type() = TII->getNamedOperand(MI, HSAIL::OpName::TypeLength)->getImm(); + + int DestIdx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::dest); + if (DestIdx != -1) + BrigEmitOperand(&MI, DestIdx, inst); + + int AddressIdx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::address); + BrigEmitOperandLdStAddress(&MI, AddressIdx, Segment); + + int Src0Idx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::src0); + if (Src0Idx != -1) + BrigEmitOperand(&MI, Src0Idx, inst); + + int Src1Idx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::src1); + if (Src1Idx != -1) + BrigEmitOperand(&MI, Src1Idx, inst); + + return inst; +} + +HSAIL_ASM::InstImage BRIGAsmPrinter::BrigEmitInstImage(const MachineInstr &MI, + unsigned BrigOpc) { + HSAIL_ASM::InstImage inst = brigantine.addInst(BrigOpc); + unsigned Opc = MI.getOpcode(); + + inst.imageType() = + TII->getNamedOperand(MI, HSAIL::OpName::imageType)->getImm(); + inst.coordType() = + TII->getNamedOperand(MI, HSAIL::OpName::coordType)->getImm(); + inst.geometry() = TII->getNamedOperand(MI, HSAIL::OpName::geometry)->getImm(); + inst.equivClass() = TII->getNamedOperand(MI, HSAIL::OpName::equiv)->getImm(); + + inst.type() = TII->getNamedOperand(MI, HSAIL::OpName::destType)->getImm(); + + int DestRIdx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::destR); + if (DestRIdx != -1) { + int DestGIdx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::destG); + if (DestGIdx == -1) // 1 component. + BrigEmitOperand(&MI, DestRIdx, inst); + else + BrigEmitVecOperand(&MI, DestRIdx, 4, inst); + } + + int ImageIdx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::image); + BrigEmitOperand(&MI, ImageIdx, inst); + + int SamplerIdx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::sampler); + BrigEmitOperand(&MI, SamplerIdx, inst); + + int CoordWidthIdx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::coordWidth); + BrigEmitOperand(&MI, CoordWidthIdx, inst); + + return inst; +} + +HSAIL_ASM::InstAddr BRIGAsmPrinter::BrigEmitInstAddr(const MachineInstr &MI, + unsigned BrigOpc) { + HSAIL_ASM::InstAddr inst = brigantine.addInst(BrigOpc); + unsigned Opc = MI.getOpcode(); + + inst.type() = TII->getNamedOperand(MI, HSAIL::OpName::TypeLength)->getImm(); + + unsigned Segment = TII->getNamedOperand(MI, HSAIL::OpName::segment)->getImm(); + inst.segment() = getHSAILSegment(Segment); + + int DestIdx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::dest); + int AddressIdx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::address); + + BrigEmitOperand(&MI, DestIdx, inst); + BrigEmitOperandLdStAddress(&MI, AddressIdx, Segment); + + return inst; +} + +bool BRIGAsmPrinter::usesGCNAtomicCounter(void) { + // TODO_HSA: This introduces another pass over all the instrs in the + // kernel. Need to find a more efficient way to get this info. + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; + ++I) { + for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); + II != IE; ++II) { + switch (II->getOpcode()) { + default: + continue; + case HSAIL::GCN_ATOMIC_APPEND_U32: + case HSAIL::GCN_ATOMIC_CONSUME_U32: + return true; + } + } + } + return false; +} + +BrigAlignment8_t BRIGAsmPrinter::getBrigAlignment(unsigned AlignVal) { + // Round to the next power of 2. + unsigned Rounded = RoundUpToAlignment(AlignVal, NextPowerOf2(AlignVal - 1)); + + BrigAlignment8_t ret = HSAIL_ASM::num2align(Rounded); + assert(ret != BRIG_ALIGNMENT_LAST && "invalid alignment value"); + return ret; +} + +// Force static initialization. +extern "C" void LLVMInitializeBRIGAsmPrinter() { + RegisterAsmPrinter X(TheHSAIL_32Target); + RegisterAsmPrinter Y(TheHSAIL_64Target); +} Index: lib/Target/HSAIL/BRIGAsmPrinter/BRIGDwarfCompileUnit.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/BRIGAsmPrinter/BRIGDwarfCompileUnit.h @@ -0,0 +1,58 @@ +//===-- BRIGDwarfCompileUnit.h ----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef BRIG_DWARF_COMPILE_UNIT_HEADER +#define BRIG_DWARF_COMPILE_UNIT_HEADER + +#include "../lib/CodeGen/AsmPrinter/DwarfCompileUnit.h" +#include "BRIGDwarfDebug.h" +#include "BRIGAsmPrinter.h" + +namespace llvm { + +class LLVM_LIBRARY_VISIBILITY BRIGDwarfCompileUnit : public CompileUnit { +protected: + BRIGAsmPrinter *m_brigAP; + BRIGDwarfDebug *m_brigDD; + + virtual void addGVLabelToBlock(DIEBlock *block, const DIGlobalVariable *GV); + virtual void addDebugLocOffset(const DbgVariable *Var, DIE *VariableDie, + unsigned int Offset); + +public: + BRIGDwarfCompileUnit(unsigned UID, DIE *D, DICompileUnit CU, + BRIGAsmPrinter *A, BRIGDwarfDebug *DW); + virtual ~BRIGDwarfCompileUnit(); + + virtual void addVariableAddress(DbgVariable *&DV, DIE *Die, + MachineLocation Location); + + virtual void addString(DIE *Die, unsigned Attribute, const StringRef Str); + + virtual void addLabel(DIE *Die, unsigned Attribute, unsigned Form, + const MCSymbol *Label); + + virtual void addDelta(DIE *Die, unsigned Attribute, unsigned Form, + const MCSymbol *Hi, const MCSymbol *Lo); + + virtual void addBRIGDirectiveOffset(DIE *Die, unsigned Attribute, + unsigned Form, uint64_t Integer); + + virtual void createGlobalVariableDIE(const MDNode *N); + + DIE *constructPGVariableDIE(DbgPGVariable *Var); + + static inline bool classof(const BRIGDwarfCompileUnit *BDCU) { return true; } + + static inline bool classof(const CompileUnit *CU) { return true; } + +}; // class BRIGDwarfCompileUnit +} // namespace llvm + +#endif // BRIG_DWARF_COMPILE_UNIT_HEADER Index: lib/Target/HSAIL/BRIGAsmPrinter/HSAILKernelManager.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/BRIGAsmPrinter/HSAILKernelManager.h @@ -0,0 +1,108 @@ +//===-- HSAILKernelManager.h ------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file HSAILKernelManager.h +/// Class that handles the metadata/abi management for the +/// AsmPrinter. Handles the parsing and generation of the metadata +/// for each kernel and keeps track of its arguments. +// +//===----------------------------------------------------------------------===// + +#ifndef _HSAILKERNELMANAGER_H_ +#define _HSAILKERNELMANAGER_H_ +#include "HSAIL.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/IR/ValueMap.h" +#include "llvm/IR/Function.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include +#include +#include + +namespace HSAIL_ASM { +class Brigantine; +} + +namespace llvm { +class HSAILSubtarget; +class HSAILMachineFunctionInfo; +class HSAILModuleInfo; +class HSAILTargetMachine; +class StructType; +class Value; +class TypeSymbolTable; +class MachineFunction; +class MachineInstr; +class ConstantFP; +class HSAILPrintfInfo; + +class HSAILKernelManager { +public: + typedef enum { RELEASE_ONLY, DEBUG_ONLY, ALWAYS } ErrorMsgEnum; + HSAILKernelManager(HSAILTargetMachine *TM); + virtual ~HSAILKernelManager(); + + /// Clear the state of the KernelManager putting it in its most initial state. + void clear(); + void setMF(MachineFunction *MF); + + /// Process the specific kernel parsing out the parameter information for the + /// kernel. + void processArgMetadata(raw_ostream &ignored, uint32_t buf, bool kernel); + + /// Prints the header for the kernel which includes the groupsize declaration + /// and calculation of the local/group/global id's. + void printHeader(const std::string &name); + + void brigEmitMetaData(HSAIL_ASM::Brigantine &brig, uint32_t id, + bool isKernel = false); + + /// Set bool value on whether to consider the function a kernel or a normal + /// function. + void setKernel(bool kernel); + + /// Set the unique ID of the kernel/function. + void setID(uint32_t id); + + /// Set the name of the kernel/function. + void setName(const std::string &name); + + // Get the UAV id for the specific pointer value. + uint32_t getUAVID(const Value *value); + +private: + void updatePtrArg(llvm::Function::const_arg_iterator Ip, int counter, + bool isKernel, const Function *F, int pointerCount); + /// Name of the current kernel. + std::string mName; + uint32_t mUniqueID; + bool mIsKernel; + bool mWasKernel; + + /// Flag to specify if an image write has occured or not in order to not add a + /// compiler specific write if no other writes to memory occured. + bool mHasImageWrite; + bool mHasOutputInst; + + /// Map from const Value * to UAV ID. + std::map mValueIDMap; + + HSAILTargetMachine *mTM; + const HSAILSubtarget *mSTM; + /// This is the global offset of the printf string id's. + MachineFunction *mMF; + HSAILMachineFunctionInfo *mMFI; + HSAILModuleInfo *mAMI; +}; // class HSAILKernelManager + +} // llvm namespace +#endif // _HSAILKERNELMANAGER_H_ Index: lib/Target/HSAIL/BRIGAsmPrinter/HSAILKernelManager.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/BRIGAsmPrinter/HSAILKernelManager.cpp @@ -0,0 +1,690 @@ +//===-- HSAILKernelManager.cpp --------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HSAILKernelManager.h" +#include "AMDOpenCLKernenv.h" +#include "HSAILCompilerErrors.h" +#include "HSAILKernel.h" +#include "HSAILMachineFunctionInfo.h" +#include "HSAILModuleInfo.h" +#include "HSAILSubtarget.h" +#include "HSAILTargetMachine.h" +#include "HSAILUtilityFunctions.h" +#include "HSAILOpaqueTypes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" + +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/MathExtras.h" + +#include "libHSAIL/HSAILBrigantine.h" +#include "libHSAIL/HSAILItems.h" + +using namespace llvm; +#define NUM_EXTRA_SLOTS_PER_IMAGE 1 + +// This header file is required for generating global variables for kernel +// argument info. +namespace clk { +typedef unsigned int uint; +typedef uint32_t cl_mem_fence_flags; +//#include +// kernel arg access qualifier and type qualifier +typedef enum clk_arg_qualifier_t { + Q_NONE = 0, + + // for image type only, access qualifier + Q_READ = 1, + Q_WRITE = 2, + + // for pointer type only + Q_CONST = 4, // pointee + Q_RESTRICT = 8, + Q_VOLATILE = 16, // pointee + Q_PIPE = 32 // pipe + +} clk_arg_qualifier_t; + +} // end of namespace clk + + +static const char *getTypeName(Type *ptr, const char *symTab, + HSAILMachineFunctionInfo *mfi, bool signedType) { + switch (ptr->getTypeID()) { + case Type::StructTyID: { + OpaqueType OT = GetOpaqueType(ptr); + + switch (OT) { + case NotOpaque: + return "struct"; + case Event: + return "event"; + case Sampler: + return "sampler"; + case I1D: + return "image1d"; + case I1DB: + return "image1d_buffer"; + case I1DA: + return "image1d_array"; + case I2D: + return "image2d"; + case I2DA: + return "image2d_array"; + case I3D: + return "image3d"; + case I2DDepth: + return "image2ddepth"; + case I2DADepth: + return "image2dadepth"; + case Sema: + return "semaphore"; + case C32: + return "counter32"; + case C64: + return "counter64"; + case ReserveId: + return "reserveId"; + case CLKEventT: + return "clk_event_t"; + case QueueT: + return "queue_t"; + case UnknownOpaque: + return "opaque"; + } + } + case Type::HalfTyID: + return "half"; + case Type::FloatTyID: + return "float"; + case Type::DoubleTyID: { + return "double"; + } + case Type::IntegerTyID: { + LLVMContext &ctx = ptr->getContext(); + if (ptr == Type::getInt8Ty(ctx)) { + return (signedType) ? "i8" : "u8"; + } else if (ptr == Type::getInt16Ty(ctx)) { + return (signedType) ? "i16" : "u16"; + } else if (ptr == Type::getInt32Ty(ctx)) { + return (signedType) ? "i32" : "u32"; + } else if (ptr == Type::getInt64Ty(ctx)) { + return (signedType) ? "i64" : "u64"; + } + break; + } + default: + break; + case Type::ArrayTyID: { + const ArrayType *AT = cast(ptr); + ptr = AT->getElementType(); + return getTypeName(ptr, symTab, mfi, signedType); + break; + } + case Type::VectorTyID: { + const VectorType *VT = cast(ptr); + ptr = VT->getElementType(); + return getTypeName(ptr, symTab, mfi, signedType); + break; + } + case Type::PointerTyID: { + const PointerType *PT = cast(ptr); + ptr = PT->getElementType(); + return getTypeName(ptr, symTab, mfi, signedType); + break; + } + case Type::FunctionTyID: { + const FunctionType *FT = cast(ptr); + ptr = FT->getReturnType(); + return getTypeName(ptr, symTab, mfi, signedType); + break; + } + } + ptr->dump(); + if (mfi) { +#if 0 + mfi->addErrorMsg(amd::CompilerErrorMessage[UNKNOWN_TYPE_NAME]); +#endif + } + return "unknown"; +} + + +void HSAILKernelManager::updatePtrArg(Function::const_arg_iterator Ip, + int counter, bool isKernel, + const Function *F, int pointerCount) { + assert(F && "Cannot pass a nullptr Pointer to F!"); + assert(Ip->getType()->isPointerTy() && + "Argument must be a pointer to be passed into this function!\n"); + std::string ptrArg("pointer:"); + const char *symTab = "NoSymTab"; + uint32_t ptrID = getUAVID(Ip); + PointerType *PT = cast(Ip->getType()); + uint32_t Align = 4; + const char *MemType = "uav"; + if (PT->getElementType()->isSized()) { + Align = + mTM->getDataLayout()->getTypeAllocSize( + PT->getElementType()); + if ((Align & (Align - 1))) + Align = NextPowerOf2(Align); + } + ptrArg += Ip->getName().str() + ":" + + getTypeName(PT, symTab, mMFI, mMFI->isSignedIntType(Ip)) + + ":1:1:" + itostr(counter * 16) + ":"; + switch (PT->getAddressSpace()) { + case HSAILAS::ADDRESS_NONE: + // O << "No Address space qualifier!"; + mMFI->addErrorMsg(hsa::CompilerErrorMessage[INTERNAL_ERROR]); + assert(1); + break; + case HSAILAS::GLOBAL_ADDRESS: + mMFI->uav_insert(ptrID); + break; + case HSAILAS::READONLY_ADDRESS: { + if (isKernel) { + const HSAILKernel *t = mAMI->getKernel(F->getName()); + if (mAMI->usesHWConstant(t, Ip->getName())) { + MemType = /*(isSI) ? "uc\0" :*/ "hc\0"; + ptrID = mAMI->getConstPtrCB(t, Ip->getName()); + } else { + MemType = "c\0"; + mMFI->uav_insert(ptrID); + } + } else { + MemType = "c\0"; + mMFI->uav_insert(ptrID); + } + break; + } + default: + case HSAILAS::PRIVATE_ADDRESS: + MemType = "hp\0"; + break; + case HSAILAS::REGION_ADDRESS: + mMFI->setUsesRegion(); + MemType = "hr\0"; + ptrID = 0; + break; + case HSAILAS::GROUP_ADDRESS: + mMFI->setUsesLocal(); + MemType = "hl\0"; + ptrID = 1; + break; + }; + ptrArg += std::string(MemType) + ":"; + ptrArg += itostr(ptrID) + ":"; + ptrArg += itostr(Align) + ":"; + const Value *ptr = Ip; + if (mMFI->read_ptr_count(ptr)) { + ptrArg += "RO"; + // FIXME: add write-only pointer detection. + //} else if (mMFI->write_ptr_count(ptr)) { + // ptrArg += "WO"; + } else { + ptrArg += "RW"; + } + + const Module *M = mMF->getMMI().getModule(); + bool isSPIR = HSAIL::isSPIRModule(*M); + if (isSPIR) { + if (pointerCount == 0) + ptrArg += ":0:0:0"; // skip the print_buffer pointer + // No need update the kernel info for block kernels (child kernel). + else if (!F->getName().startswith("__OpenCL___amd_blocks_func__")) { + int typeQual = + mAMI->getKernel(F->getName())->accessTypeQualifer[pointerCount - 1]; + ptrArg += (typeQual & clk::Q_VOLATILE) ? ":1" : ":0"; + ptrArg += (typeQual & clk::Q_RESTRICT) ? ":1" : ":0"; + ptrArg += (typeQual & clk::Q_PIPE) ? ":1" : ":0"; + } + } else { + ptrArg += (mMFI->isVolatilePointer(Ip)) ? ":1" : ":0"; + ptrArg += (mMFI->isRestrictPointer(Ip)) ? ":1" : ":0"; + } + mMFI->addMetadata(ptrArg, true); +} + +HSAILKernelManager::HSAILKernelManager(HSAILTargetMachine *TM) { + mTM = TM; + mSTM = mTM->getSubtargetImpl(); + mMFI = nullptr; + mAMI = nullptr; + mMF = nullptr; + clear(); +} + +HSAILKernelManager::~HSAILKernelManager() { clear(); } + +void HSAILKernelManager::setMF(MachineFunction *MF) { + mMF = MF; + mMFI = MF->getInfo(); + mAMI = &(MF->getMMI().getObjFileInfo()); +} + +void HSAILKernelManager::clear() { + mUniqueID = 0; + mWasKernel = false; + mHasImageWrite = false; + mHasOutputInst = false; +} + +void HSAILKernelManager::processArgMetadata(raw_ostream &ignored, uint32_t buf, + bool isKernel) { + const Function *F = mMF->getFunction(); + const char *symTab = "NoSymTab"; + Function::const_arg_iterator Ip = F->arg_begin(); + Function::const_arg_iterator Ep = F->arg_end(); + int pointerCount = 0; + + if (F->hasStructRetAttr()) { + assert(Ip != Ep && "Invalid struct return fucntion!"); + mMFI->addErrorMsg(hsa::CompilerErrorMessage[INTERNAL_ERROR]); + ++Ip; + } + uint32_t mCBSize = 0; + uint32_t CounterNum = 0; + uint32_t SemaNum = 0; + uint32_t ROArg = 0; + uint32_t WOArg = 0; + uint32_t RWArg = 0; + uint32_t NumArg = 0; + uint32_t SamplerNum = 0; + + while (Ip != Ep) { + Type *cType = Ip->getType(); + if (cType->isIntOrIntVectorTy() || cType->isFPOrFPVectorTy()) { + std::string argMeta("value:"); + argMeta += + Ip->getName().str() + ":" + + getTypeName(cType, symTab, mMFI, mMFI->isSignedIntType(Ip)) + + ":"; + int bitsize = cType->getPrimitiveSizeInBits(); + int numEle = 1; + if (cType->getTypeID() == Type::VectorTyID) { + numEle = cast(cType)->getNumElements(); + } + argMeta += itostr(numEle) + ":1:" + itostr((int64_t)mCBSize << 4); + mMFI->addMetadata(argMeta, true); + + // FIXME: simplify + if ((bitsize / numEle) < 32) { + bitsize = numEle >> 2; + } else { + bitsize >>= 7; + } + if (!bitsize) { + bitsize = 1; + } + + mCBSize += bitsize; + } else if (const PointerType *PT = dyn_cast(cType)) { + Type *CT = PT->getElementType(); + const StructType *ST = dyn_cast(CT); + if (ST && ST->isOpaque()) { + OpaqueType OT = GetOpaqueType(ST); + if (IsImage(OT)) { + + std::string imageArg("image:"); + imageArg += Ip->getName().str() + ":"; + switch (OT) { + case I1D: + imageArg += "1D:"; + break; + case I1DA: + imageArg += "1DA:"; + break; + case I1DB: + imageArg += "1DB:"; + break; + case I2D: + imageArg += "2D:"; + break; + case I2DA: + imageArg += "2DA:"; + break; + case I3D: + imageArg += "3D:"; + break; + case I2DDepth: + imageArg += "2DDepth:"; + break; + case I2DADepth: + imageArg += "2DADepth:"; + break; + default: + llvm_unreachable("unknown image type"); + break; + } + if (isKernel) { + if (mAMI->isReadOnlyImage(mMF->getFunction()->getName(), + (ROArg + WOArg + RWArg))) { + imageArg += "RO:" + itostr(ROArg); + ++ROArg; + } else if (mAMI->isWriteOnlyImage(mMF->getFunction()->getName(), + (ROArg + WOArg + RWArg))) { + imageArg += "WO:" + itostr(WOArg); + ++WOArg; + } else if (mAMI->isReadWriteImage(mMF->getFunction()->getName(), + (ROArg + WOArg + RWArg))) { + imageArg += "RW:" + itostr(RWArg); + ++RWArg; + } + } + imageArg += ":1:" + itostr(mCBSize * 16); + mMFI->addMetadata(imageArg, true); + mMFI->addi32Literal(mCBSize); + mCBSize += NUM_EXTRA_SLOTS_PER_IMAGE + 1; + } else if (OT == C32 || OT == C64) { + std::string counterArg("counter:"); + counterArg += Ip->getName().str() + ":" + + itostr(OT == C32 ? 32 : 64) + ":" + + itostr(CounterNum++) + ":1:" + itostr(mCBSize * 16); + mMFI->addMetadata(counterArg, true); + ++mCBSize; + } else if (OT == Sema) { + std::string semaArg("sema:"); + semaArg += Ip->getName().str() + ":" + itostr(SemaNum++) + ":1:" + + itostr(mCBSize * 16); + mMFI->addMetadata(semaArg, true); + ++mCBSize; + } else if (OT == Sampler) { + std::string samplerArg("sampler:"); + samplerArg += Ip->getName().str() + ":" + itostr(SamplerNum++) + + ":1:" + itostr(mCBSize * 16); + mMFI->addMetadata(samplerArg, true); + ++mCBSize; + } else if (OT == QueueT) { + std::string queueArg("queue:"); + PointerType *PT = cast(Ip->getType()); + const char *MemType = "uav"; + if (PT->getAddressSpace() == HSAILAS::PRIVATE_ADDRESS) { + MemType = "hp\0"; + } + queueArg += + Ip->getName().str() + ":" + + getTypeName(PT, symTab, mMFI, mMFI->isSignedIntType(Ip)) + + ":1:1:" + itostr(mCBSize * 16) + ":" + MemType; + mMFI->addMetadata(queueArg, true); + ++mCBSize; + } else { + updatePtrArg(Ip, mCBSize, isKernel, F, pointerCount++); + ++mCBSize; + } + } else if (CT->getTypeID() == Type::StructTyID && + Ip->hasByValAttr()) { // To distinguish pass-by-value from + // pass-by-ptr. + // When struct is passed-by-value, the pointer to the struct copy + // is passed to the kernel. Relevant RTI is generated here + // (value...struct). + // [Informative: RTI for pass-by-pointer case (pointer...struct) is + // generated + // in the next "else if" block.] + const DataLayout *dl = mTM->getDataLayout(); + const StructLayout *sl = dl->getStructLayout(dyn_cast(CT)); + int bytesize = sl->getSizeInBytes(); + int reservedsize = (bytesize + 15) & ~15; + int numSlots = reservedsize >> 4; + if (!numSlots) { + numSlots = 1; + } + std::string structArg("value:"); + structArg += Ip->getName().str() + ":struct:" + itostr(bytesize) + + ":1:" + itostr(mCBSize * 16); + mMFI->addMetadata(structArg, true); + mCBSize += numSlots; + } else if (CT->isIntOrIntVectorTy() || CT->isFPOrFPVectorTy() || + CT->getTypeID() == Type::ArrayTyID || + CT->getTypeID() == Type::PointerTyID || + PT->getAddressSpace() != HSAILAS::PRIVATE_ADDRESS) { + updatePtrArg(Ip, mCBSize, isKernel, F, pointerCount++); + ++mCBSize; + } else { + assert(0 && "Cannot process current pointer argument"); + mMFI->addErrorMsg(hsa::CompilerErrorMessage[INTERNAL_ERROR]); + } + } else { + assert(0 && "Cannot process current kernel argument"); + mMFI->addErrorMsg(hsa::CompilerErrorMessage[INTERNAL_ERROR]); + } + const Module *M = mMF->getMMI().getModule(); + bool isSPIR = HSAIL::isSPIRModule(*M); + bool isConstArg = false; + + StringRef FuncName = F->getName(); + // No need update the kernel info for block kernels (child kernel). + if (isSPIR && !FuncName.startswith("__OpenCL___amd_blocks_func_")) { + if (NumArg >= HSAIL::KE_NUM_ARGS) { + int typeQual = mAMI->getKernel(F->getName()) + ->accessTypeQualifer[NumArg - HSAIL::KE_NUM_ARGS]; + if ((typeQual & clk::Q_CONST)) + isConstArg = true; + } + } else if (mMFI->isConstantArgument(Ip)) { + isConstArg = true; + } + if (isConstArg) { + std::string constArg("constarg:"); + constArg += itostr(NumArg) + ":" + Ip->getName().str(); + mMFI->addMetadata(constArg, true); + } + ++NumArg; + ++Ip; + } +} + +void HSAILKernelManager::printHeader(const std::string &name) { + mName = name; + mAMI->getOrCreateFunctionID(name); +} + +/** + * + * HSAIL format for emitting runtime information: + * block "rti" + * blockstring ""; + * endblock; + * + * @param O + * @param id + * @param kernel + */ + +void HSAILKernelManager::setKernel(bool kernel) { + mIsKernel = kernel; + if (kernel) { + mWasKernel = mIsKernel; + } +} + +void HSAILKernelManager::setID(uint32_t id) { mUniqueID = id; } + +void HSAILKernelManager::setName(const std::string &name) { mName = name; } + +class RTI { + std::string m_str; + HSAIL_ASM::Brigantine &m_brig; + mutable raw_string_ostream m_os; + +public: + RTI(HSAIL_ASM::Brigantine &brig) : m_brig(brig), m_os(m_str) {} + + ~RTI() { + HSAIL_ASM::DirectivePragma pragma = + m_brig.append(); + HSAIL_ASM::ItemList opnds; + opnds.push_back(m_brig.createOperandString("AMD RTI")); + const std::string &str = m_os.str(); + opnds.push_back(m_brig.createOperandString(str)); + pragma.operands() = opnds; + } + + raw_string_ostream &os() const { return m_os; } +}; + +template const RTI &operator<<(const RTI &os, const T &s) { + os.os() << s; + return os; +} +const RTI &operator<<(const RTI &os, const char *s) { + os.os() << s; + return os; +} + +void HSAILKernelManager::brigEmitMetaData(HSAIL_ASM::Brigantine &brig, + uint32_t id, bool isKernel) { + + // Initialization block related to current function being processed + int kernelId = id; + if (isKernel) { + kernelId = mAMI->getOrCreateFunctionID(mName); + mMFI->addCalledFunc(id); + mUniqueID = kernelId; + mIsKernel = true; + } + + const HSAILKernel *kernel = mAMI->getKernel(mName); + + if (kernel && isKernel && kernel->sgv) { + if (kernel->sgv->mHasRWG) { + HSAIL_ASM::DirectiveControl dc = + brig.append(); + dc.control() = BRIG_CONTROL_REQUIREDWORKGROUPSIZE; + + HSAIL_ASM::ItemList opnds; + for (int i = 0; i < 3; ++i) { + opnds.push_back( + brig.createImmed(kernel->sgv->reqGroupSize[i], BRIG_TYPE_U32)); + } + dc.operands() = opnds; + } + } + + if (isKernel) { + std::string emptyStr(""); + std::string &refEmptyStr(emptyStr); + raw_string_ostream oss(refEmptyStr); + // function name + RTI(brig) << "ARGSTART:" << mName; + if (isKernel) { + // version + RTI(brig) << "version:" + << itostr(mSTM->supportMetadata30() ? HSAIL_MAJOR_VERSION : 2) + << ":" << itostr(HSAIL_MINOR_VERSION) + ":" + << itostr(mSTM->supportMetadata30() ? HSAIL_REVISION_NUMBER + : HSAIL_20_REVISION_NUMBER); + // device info + RTI(brig) << "device:" << mSTM->getDeviceName(); + } + RTI(brig) << "uniqueid:" << kernelId; + if (kernel) { + size_t hwlocal = ((kernel->curHWSize + 3) & (~0x3)); + size_t hwregion = ((kernel->curHWRSize + 3) & (~0x3)); + // private memory + RTI(brig) << "memory:" + << "hwprivate:" + << (((mMFI->getStackSize() + mMFI->getPrivateSize() + 15) & + (~0xF))); + // region memory + RTI(brig) << "memory:" + << "hwregion:" << hwregion; + // local memory + RTI(brig) << "memory:" + << "hwlocal:" << hwlocal + mMFI->getGroupSize(); + if (kernel && isKernel && kernel->sgv) { + if (kernel->sgv->mHasRWG) { + RTI(brig) << "cws:" << kernel->sgv->reqGroupSize[0] << ":" + << kernel->sgv->reqGroupSize[1] << ":" + << kernel->sgv->reqGroupSize[2]; + } + if (kernel->sgv->mHasRWR) { + RTI(brig) << "crs:" << kernel->sgv->reqRegionSize[0] << ":" + << kernel->sgv->reqRegionSize[1] << ":" + << kernel->sgv->reqRegionSize[2]; + } + } + } + if (isKernel) { + for (std::vector::iterator ib = mMFI->kernel_md_begin(), + ie = mMFI->kernel_md_end(); + ib != ie; ++ib) { + std::string md = *ib; + if (md.find("argmap") == std::string::npos) { + RTI(brig) << (*ib); + } + } + } + + for (std::set::iterator ib = mMFI->func_md_begin(), + ie = mMFI->func_md_end(); + ib != ie; ++ib) { + RTI(brig) << (*ib); + } + + if (!mMFI->func_empty()) { + oss.str().clear(); + oss << "function:" << mMFI->func_size(); + + for (unsigned FID : mMFI->funcs()) + oss << ':' << FID; + + RTI(brig) << oss.str(); + } + + if (isKernel) { + for (StringMap::iterator smb = mMFI->sampler_begin(), + sme = mMFI->sampler_end(); + smb != sme; ++smb) { + RTI(brig) << "sampler:" << (*smb).second.name << ":" + << (*smb).second.idx << ":" + << ((*smb).second.val == (uint32_t)-1 ? 0 : 1) << ":" + << ((*smb).second.val != (uint32_t)-1 ? (*smb).second.val + : 0); + } + } + if (mSTM->isLargeModel()) { + RTI(brig) << "memory:64bitABI"; + } + + if (isKernel) { + RTI(brig) << "privateid:" << DEFAULT_SCRATCH_ID; + } + // Metadata for the device enqueue. + if (kernel && isKernel) { + RTI(brig) << "enqueue_kernel:" << kernel->EnqueuesKernel; + RTI(brig) << "kernel_index:" << kernel->KernelIndex; + } + + if (kernel) { + for (unsigned I = 0, E = kernel->ArgTypeNames.size(); I != E; ++I) { + RTI(brig) << "reflection:" << I << ":" << kernel->ArgTypeNames[I]; + } + } + + RTI(brig) << "ARGEND:" << mName; + } + + // De-initialization block + if (isKernel) { + mIsKernel = false; + mMFI->eraseCalledFunc(id); + mUniqueID = id; + } +} + +uint32_t HSAILKernelManager::getUAVID(const Value *value) { + if (mValueIDMap.find(value) != mValueIDMap.end()) { + return mValueIDMap[value]; + } + + return DEFAULT_RAW_UAV_ID; +} Index: lib/Target/HSAIL/BRIGAsmPrinter/LibHSAILAdapters.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/BRIGAsmPrinter/LibHSAILAdapters.h @@ -0,0 +1,33 @@ +//===- LibHSAIL.h - Convert HSAIL LLVM code to assembly ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIB_TARGET_HSAIL_BRIGASMPRINTER_LIBHSAILADAPTERS_H +#define LLVM_LIB_TARGET_HSAIL_BRIGASMPRINTER_LIBHSAILADAPTERS_H + +#include "libHSAIL/HSAILBrigObjectFile.h" + +class RawOstreamWriteAdapter : public HSAIL_ASM::WriteAdapter { + llvm::raw_ostream &os; + +public: + RawOstreamWriteAdapter(llvm::raw_ostream &os_, std::ostream &errs_) + : IOAdapter(errs_), WriteAdapter(errs_), os(os_){}; + + int write(const char *data, size_t numBytes) const override { + os.write(data, numBytes); + return 0; + } + + Position getPos() const override { return os.tell(); } + + void setPos(Position) override { llvm_unreachable("Unimplemented"); } + + ~RawOstreamWriteAdapter() { os.flush(); } +}; + +#endif Index: lib/Target/HSAIL/CMakeLists.txt =================================================================== --- /dev/null +++ lib/Target/HSAIL/CMakeLists.txt @@ -0,0 +1,110 @@ + +include(CMakeDependentOption) + +set(LLVM_TARGET_DEFINITIONS HSAIL.td) + +tablegen(LLVM HSAILGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM HSAILGenInstrInfo.inc -gen-instr-info) +tablegen(LLVM HSAILGenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM HSAILGenCodeEmitter.inc -gen-emitter) +tablegen(LLVM HSAILGenDAGISel.inc -gen-dag-isel) +tablegen(LLVM HSAILGenSubtargetInfo.inc -gen-subtarget) +tablegen(LLVM HSAILGenCallingConv.inc -gen-callingconv) +tablegen(LLVM HSAILGenIntrinsics.inc -gen-tgt-intrinsic) + +add_public_tablegen_target(HSAILCommonTableGen) + +find_path(LIBHSAIL_INCLUDE_DIR + NAMES + libHSAIL/Brig.h) + +find_library(LIBHSAIL_LIBRARIES hsail) + + +add_subdirectory(HSAILUtil) + +cmake_dependent_option(HSAIL_USE_LIBHSAIL + "Use libHSAIL for code emission of HSAIL" ON + "LIBHSAIL_LIBRARIES" OFF) + +set(sources + HSAILAsmPrinter.cpp + HSAILAlwaysInlinePass.cpp +# BRIGDwarfCompileUnit.cpp +# BRIGDwarfDebug.cpp + HSAILELFTargetObjectFile.cpp + HSAILFrameLowering.cpp + HSAILInstrInfo.cpp + HSAILIntrinsicInfo.cpp + HSAILISelDAGToDAG.cpp + HSAILISelLowering.cpp + HSAILMachineFunctionInfo.cpp + HSAILMCInstLower.cpp + HSAILModuleInfo.cpp + HSAILParamManager.cpp + HSAILRegisterInfo.cpp + HSAILSection.cpp + HSAILStoreInitializer.cpp + HSAILSubtarget.cpp + HSAILTargetMachine.cpp + HSAILUtilityFunctions.cpp + HSAILOpaqueTypes.cpp + ) + +if(HSAIL_USE_LIBHSAIL) + include_directories(${LIBHSAIL_INCLUDE_DIR}) + add_definitions("-DHSAIL_USE_LIBHSAIL") + + # We keep parts with a dependency on libHSAIL in a subdirectory + # because the LLVM build checks for and errors on "unknown" source + # files if we don't want to build them. + list(APPEND sources + BRIGAsmPrinter/BRIGAsmPrinter.h + BRIGAsmPrinter/BRIGAsmPrinter.cpp + BRIGAsmPrinter/BRIGDwarfCompileUnit.h + BRIGAsmPrinter/HSAILKernelManager.cpp + BRIGAsmPrinter/HSAILKernelManager.h + BRIGAsmPrinter/LibHSAILAdapters.h) +endif() + + +if( CMAKE_CL_64 ) + # A workaround for a bug in cmake 2.8.3. See PR 8885. + if( CMAKE_VERSION STREQUAL "2.8.3" ) + include(CMakeDetermineCompilerId) + endif() + # end of workaround. +# enable_language(ASM_MASM) +# ADD_CUSTOM_COMMAND( +# OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/HSAILCompilationCallback_Win64.obj +# MAIN_DEPENDENCY HSAILCompilationCallback_Win64.asm +# COMMAND ${CMAKE_ASM_MASM_COMPILER} /Fo ${CMAKE_CURRENT_BINARY_DIR}/HSAILCompilationCallback_Win64.obj /c ${CMAKE_CURRENT_SOURCE_DIR}/HSAILCompilationCallback_Win64.asm +# ) +# set(sources ${sources} ${CMAKE_CURRENT_BINARY_DIR}/HSAILCompilationCallback_Win64.obj) +endif() + +add_llvm_target(HSAILCodeGen ${sources}) + +target_link_libraries(LLVMHSAILCodeGen PRIVATE LLVMHSAILUtil) + +if(HSAIL_USE_LIBHSAIL) + target_link_libraries(LLVMHSAILCodeGen PRIVATE ${LIBHSAIL_LIBRARIES}) +endif() + + +# add_dependencies(LLVMHSAILCodeGen +# LLVMAnalysis +# LLVMAsmPrinter +# LLVMCodeGen +# LLVMCore +# LLVMMC +# LLVMSelectionDAG +# LLVMSupport +# LLVMTarget +# LLVMHSAILAsmPrinter +# ) +#add_subdirectory(AsmParser) +#add_subdirectory(Disassembler) +add_subdirectory(InstPrinter) +add_subdirectory(TargetInfo) +add_subdirectory(MCTargetDesc) Index: lib/Target/HSAIL/HSAIL.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAIL.h @@ -0,0 +1,233 @@ +//===-- HSAIL.h - Top-level interface for HSAIL representation --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the x86 +// target library, as used by the LLVM JIT. +// +//===----------------------------------------------------------------------===// + +#ifndef TARGET_HSAIL_H +#define TARGET_HSAIL_H + +#include "llvm/Support/DataTypes.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCInstrDesc.h" + +#include "MCTargetDesc/HSAILMCTargetDesc.h" + +#define HSAIL_MAJOR_VERSION 3 +#define HSAIL_MINOR_VERSION 1 +#define HSAIL_REVISION_NUMBER 104 +#define HSAIL_20_REVISION_NUMBER 88 +#define ARENA_SEGMENT_RESERVED_UAVS 12 +#define DEFAULT_ARENA_UAV_ID 8 +#define DEFAULT_RAW_UAV_ID 7 +#define GLOBAL_RETURN_RAW_UAV_ID 11 +#define HW_MAX_NUM_CB 8 +#define MAX_NUM_UNIQUE_UAVS 8 + +// The next two values can never be zero, as zero is the ID that is +// used to assert against. +#define DEFAULT_LDS_ID 1 +#define DEFAULT_GDS_ID 1 +#define DEFAULT_SCRATCH_ID 1 +#define DEFAULT_VEC_SLOTS 8 + +#define OCL_DEVICE_ALL 0xFFFFF + +const unsigned int RESERVED_FUNCS = 1024; + +namespace llvm { + +class FunctionPass; +class LoopPass; +class MCCodeEmitter; +class MCContext; +class MCObjectWriter; +class MCSubtargetInfo; +class MCContext; +class MCInstrInfo; +class MCRegisterInfo; +class MCStreamer; +class MachineCodeEmitter; +class Target; +class TargetAsmBackend; +class HSAILTargetMachine; +class formatted_raw_ostream; +class raw_ostream; +class ModulePass; + +ModulePass *createHSAILLowerSPIRSamplersPass(); + +/// +/// +FunctionPass *createHSAILControlDependencyAnalysis(); + +/// +/// +FunctionPass *createHSAILUniformOperations(const HSAILTargetMachine &TM); + +/// +/// +FunctionPass *createHSAILOptimizeMemoryOps(const HSAILTargetMachine &TM); + +/// +/// +FunctionPass *createHSAILPropagateImageOperandsPass(); + +/// +/// +ModulePass *createHSAILSyntaxCleanupPass(); + +/// Insert kernel index metadata for device enqueue. +ModulePass *createHSAILInsertKernelIndexMetadataPass(); + +/// Optimize and lower AddrSpaceCast +FunctionPass *createHSAILAddrSpaceCastPass(); + +ModulePass *createHSAILProducePrintfMetadataPass(); +FunctionPass *createHSAILConsumePrintfMetadataPass(HSAILTargetMachine &TM); + +ModulePass *createHSAILNullPtrInsertionPass(); + +/// createHSAILEarlyCFGOpts - HSAIL specific control flow optimizations +LoopPass *createHSAILEarlyCFGOpts(); + +/// createHSAILISelDag - This pass converts a legalized DAG into a +/// HSAIL-specific DAG, ready for instruction scheduling. +FunctionPass *createHSAILISelDag(TargetMachine &TM); + +/// createGlobalBaseRegPass - This pass initializes a global base +/// register for PIC on x86-32. +FunctionPass *createGlobalBaseRegPass(); + +/// createHSAILFloatingPointStackifierPass - This function returns a pass which +/// converts floating point register references and pseudo instructions into +/// floating point stack references and physical instructions. +/// +FunctionPass *createHSAILFloatingPointStackifierPass(); + +/// createSSEDomainFixPass - This pass twiddles SSE opcodes to prevent domain +/// crossings. +FunctionPass *createSSEDomainFixPass(); + +/// createHSAILEmitCodeToMemory - Returns a pass that converts a register +/// allocated function into raw machine code in a dynamically +/// allocated chunk of memory. +FunctionPass *createEmitHSAILCodeToMemory(); + +/// createHSAILMaxStackAlignmentHeuristicPass - This function returns a pass +/// which determines whether the frame pointer register should be +/// reserved in case dynamic stack alignment is later required. +/// +FunctionPass *createHSAILMaxStackAlignmentHeuristicPass(); + +FunctionPass *createHSAILFuncArgScopeEmitter(TargetMachine &tm, + CodeGenOpt::Level OL); + +ModulePass *createHSAILAlwaysInlinePass(); + +extern Target TheHSAIL_32Target, TheHSAIL_64Target; + +} // End llvm namespace + +namespace llvm { +namespace HSAILAS { + +enum AddressSpaces { + PRIVATE_ADDRESS = 0, + GLOBAL_ADDRESS = 1, + READONLY_ADDRESS = 2, + GROUP_ADDRESS = 3, + FLAT_ADDRESS = 4, + REGION_ADDRESS = 5, + SPILL_ADDRESS = 6, + KERNARG_ADDRESS = 7, + ARG_ADDRESS = 8, + ADDRESS_NONE = 9 +}; +} + +// Target flags from tablegen +// See HSAILInstFormats.td +namespace HSAILInstrFlags { +enum { + // Instruction kind. + InstAddr = 1 << 3, + InstAtomic = 1 << 4, + InstBasic = 1 << 5, + InstBr = 1 << 6, + InstCmp = 1 << 7, + InstCvt = 1 << 8, + InstImage = 1 << 9, + InstLane = 1 << 10, + InstMem = 1 << 11, + InstMemFence = 1 << 12, + InstMod = 1 << 13, + InstQueryImage = 1 << 14, + InstQuerySampler = 1 << 15, + InstQueue = 1 << 16, + InstSeg = 1 << 17, + InstSegCvt = 1 << 18, + InstSignal = 1 << 19, + InstSourceType = 1 << 20, + + // Others. + IS_CONV = 1 << 23, + IS_IMAGEINST = 1 << 24, + + // Default modifier attributes. Used for marking default values of a + // modifier for an instruction to skip printing it. + RoundAttrLo = 1 << 25, // 2 bits + RoundAttrHi = 1 << 26, + RoundAttr = RoundAttrLo | RoundAttrHi, + + WidthAttrLo = 1 << 27, // 2 bits + WidthAttrHi = 1 << 28, + WidthAttr = WidthAttrLo | WidthAttrHi, + + HasDefaultSegment = 1 << 29, + + InstBrigOpcodeLo = UINT64_C(1) << 48, + InstBrigOpcode = UINT64_C(0xffff) << 48 +}; +} + +namespace HSAILWidthAttrFlags { +enum { + WidthAttrNone = 0, + WidthAttrAll = 1, + WidthAttrWaveSize = 2, + WidthAttrOne = 3 +}; +} + +// Enum for memory operand decoding +namespace HSAILADDRESS { +enum { BASE = 0, REG = 1, OFFSET = 2, ADDRESS_NUM_OPS }; +} + +// Target architectures to optimize for +enum OptimizeForTargetArch { + GENERIC, // No target specific flavor + SI // Optimize fot Southern Islands family +}; + +namespace HSAIL { +enum OperandType { + /// Operand with register or immediate. + OPERAND_REG_IMM = llvm::MCOI::OPERAND_FIRST_TARGET +}; +} +} + +#define IMAGE_ARG_BIAS (1 << 16) + +#endif Index: lib/Target/HSAIL/HSAIL.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAIL.td @@ -0,0 +1,59 @@ +//===----- HSAIL.td - Target definition file for HSAIL ----*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// HSAIL Subtarget features. +//===----------------------------------------------------------------------===// + +def FeatureImages : SubtargetFeature<"images", + "HasImages", + "true", + "Enable image precision operations">; + +def FeatureGCN : SubtargetFeature<"gcn", + "IsGCN", + "true", + "Enable AMD GCN extensions">; + +//===----------------------------------------------------------------------===// +// HSAIL processors supported. +//===----------------------------------------------------------------------===// + +class Proc Features> + : Processor; + +def : Proc<"generic", []>; +def : Proc<"kaveri", [FeatureImages, FeatureGCN]>; + +//===----------------------------------------------------------------------===// +// Register File Description +//===----------------------------------------------------------------------===// + +include "HSAILRegisterInfo.td" + +//===----------------------------------------------------------------------===// +// Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "HSAILInstrInfo.td" + +def HSAILInstrInfo : InstrInfo; + +def BRIGAsmWriter : AsmWriter { + field bit isMCAsmWriter = 0; +} + +def HSAIL : Target { + let InstructionSet = HSAILInstrInfo; + + let AssemblyWriters = [BRIGAsmWriter]; +} + Index: lib/Target/HSAIL/HSAILAlwaysInlinePass.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILAlwaysInlinePass.cpp @@ -0,0 +1,69 @@ +//===-- HSAILAlwaysInlinePass.cpp - Promote Allocas ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This pass marks all internal functions as always_inline and creates +/// duplicates of all other functions a marks the duplicates as always_inline. +// +//===----------------------------------------------------------------------===// + +#include "HSAIL.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/Cloning.h" + +using namespace llvm; + +namespace { + +class HSAILAlwaysInline : public ModulePass { + + static char ID; + +public: + HSAILAlwaysInline() : ModulePass(ID) {} + bool runOnModule(Module &M) override; + const char *getPassName() const override { + return "HSAIL Always Inline Pass"; + } +}; + +} // End anonymous namespace + +char HSAILAlwaysInline::ID = 0; + +bool HSAILAlwaysInline::runOnModule(Module &M) { + + std::vector FuncsToClone; + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + Function &F = *I; + if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty() && + !F.hasFnAttribute(Attribute::NoInline)) + FuncsToClone.push_back(&F); + } + + for (Function *F : FuncsToClone) { + ValueToValueMapTy VMap; + Function *NewFunc = CloneFunction(F, VMap, false); + NewFunc->setLinkage(GlobalValue::InternalLinkage); + F->getParent()->getFunctionList().push_back(NewFunc); + F->replaceAllUsesWith(NewFunc); + } + + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + Function &F = *I; + if (F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::NoInline)) { + F.addFnAttr(Attribute::AlwaysInline); + } + } + return false; +} + +ModulePass *llvm::createHSAILAlwaysInlinePass() { + return new HSAILAlwaysInline(); +} Index: lib/Target/HSAIL/HSAILArithmetic.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILArithmetic.td @@ -0,0 +1,383 @@ +//===------------------------------------------------------*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +defm ADD : InstMod_2Op_IntTypes<"add", BrigOpcode.ADD>; +defm ADD : InstMod_2Op_FPTypes<"add", BrigOpcode.ADD>; + +defm SUB : InstMod_2Op_IntTypes<"sub", BrigOpcode.SUB>; +defm SUB : InstMod_2Op_FPTypes<"sub", BrigOpcode.SUB>; + +defm MUL : InstMod_2Op_IntTypes<"mul", BrigOpcode.MUL>; +defm MUL : InstMod_2Op_FPTypes<"mul", BrigOpcode.MUL>; + +defm DIV : InstMod_2Op_IntTypes<"div", BrigOpcode.DIV>; +defm DIV : InstMod_2Op_FPTypes<"div", BrigOpcode.DIV>; + +defm REM : InstBasic_2Op_IntTypes<"rem", BrigOpcode.REM>; + +defm MULHI : InstBasic_2Op_IntTypes<"mulhi", BrigOpcode.MULHI>; + +defm ABS : InstMod_1Op_SignedIntTypes<"abs", BrigOpcode.ABS>; +defm ABS : InstMod_1Op_FPTypes<"abs", BrigOpcode.ABS>; + +defm : InstMod_2Op_IntTypes_Pat<"ADD", add>; +defm : InstMod_2Op_FPTypes_Pat<"ADD", fadd>; +defm : InstMod_2Op_IntTypes_Pat<"SUB", sub, 0>; +defm : InstMod_2Op_FPTypes_Pat<"SUB", fsub>; +defm : InstMod_2Op_IntTypes_Pat<"MUL", mul, 0>; +defm : InstMod_2Op_FPTypes_Pat<"MUL", fmul>; + +defm : InstMod_2Op_IntTypes_Pat<"DIV", udiv, 0>; +defm : InstMod_2Op_IntTypes_Pat<"DIV", sdiv, 1>; +defm : InstMod_2Op_FPTypes_Pat<"DIV", fdiv>; + +// Division without implicit ftz. +def : InstMod_2Op_Pat; + +defm : InstBasic_2Op_IntTypes_Pat<"REM", urem, 0>; +defm : InstBasic_2Op_IntTypes_Pat<"REM", srem, 1>; + +defm : InstBasic_2Op_IntTypes_Pat<"MULHI", mulhu, 0>; +defm : InstBasic_2Op_IntTypes_Pat<"MULHI", mulhs, 1>; + + +def : InstMod_1Op_Pat; +def : InstMod_1Op_Pat; + +defm MAD : InstBasic_3Op_SUF<"mad", BrigOpcode.MAD>; +defm : InstBasic_3Op_IntTypes_Pat<"MAD", HSAILumad, 0>; + +// u[32,64] forms illegal in HSAIL +defm NEG : InstBasic_1Op_SF<"neg", BrigOpcode.NEG>; + +let AddedComplexity = 2 in { +// Needs to match before sub pattern. +defm : InstBasic_1Op_IntTypes_Pat<"NEG", ineg, 1>; +} + +defm : InstBasic_1Op_FPTypes_Pat<"NEG", fneg>; + +defm COPYSIGN : InstMod_2Op_FPTypes<"copysign", BrigOpcode.COPYSIGN>; + +// FIXME: Need to support fcopysign with different second operand type. +def : InstMod_2Op_Pat; +def : InstMod_2Op_Pat; + + +defm FMA : InstMod_3Op_FPTypes<"fma", BrigOpcode.FMA>; +defm : InstMod_3Op_FPTypes_Pat<"FMA", fma>; + +defm NFMA : InstBasic_3Op_FPTypes<"nfma", BrigOpcode.NFMA>; +defm : InstBasic_3Op_FPTypes_Pat<"NFMA", HSAILnfma>; + +defm MAX : InstMod_2Op_SUF<"max", BrigOpcode.MAX>; +defm MIN : InstMod_2Op_SUF<"min", BrigOpcode.MIN>; + +defm : InstMod_2Op_FPTypes_Pat<"MAX", fmaxnum, BrigRound.NONE>; +defm : InstMod_2Op_FPTypes_Pat<"MIN", fminnum, BrigRound.NONE>; + +defm : InstMod_2Op_IntTypes_Pat<"MAX", HSAILumax, 0>; +defm : InstMod_2Op_IntTypes_Pat<"MAX", HSAILsmax, 1>; + +defm : InstMod_2Op_IntTypes_Pat<"MIN", HSAILumin, 0>; +defm : InstMod_2Op_IntTypes_Pat<"MIN", HSAILsmin, 1>; + +defm GCN_MAX : InstBasic_2Op_FPTypes<"gcn_max", BrigOpcode.GCNMAX>; +defm GCN_MIN : InstBasic_2Op_FPTypes<"gcn_min", BrigOpcode.GCNMIN>; + +// FIXME: Remove these intrinsics +def : InstBasic_2Op_Pat; +def : InstBasic_2Op_Pat; +def : InstBasic_2Op_Pat; +def : InstBasic_2Op_Pat; + + +defm MUL24 : InstBasic_2Op_IntTypes<"mul24", BrigOpcode.MUL24>; +def : InstBasic_2Op_Pat; +def : InstBasic_2Op_Pat; + + +defm MAD24 : InstBasic_3Op_IntTypes<"mad24", BrigOpcode.MAD24>; +def : InstBasic_3Op_Pat; +def : InstBasic_3Op_Pat; + +defm SQRT : InstMod_1Op_FPTypes<"sqrt", BrigOpcode.SQRT>; +defm NSQRT : InstBasic_1Op_FPTypes<"nsqrt", BrigOpcode.NSQRT>; +defm NRSQRT : InstBasic_1Op_FPTypes<"nrsqrt", BrigOpcode.NRSQRT>; +defm NRCP : InstBasic_1Op_FPTypes<"nrcp", BrigOpcode.NRCP>; + +// FIXME: Having the ftz explicitly in this one is totally inconsistent +def : InstMod_1Op_Pat; + +def : InstMod_1Op_Pat; + +def : InstBasic_1Op_Pat; +def : InstBasic_1Op_Pat; + +def : InstBasic_1Op_Pat; +def : InstBasic_1Op_Pat; + +def : InstBasic_1Op_Pat; +def : InstBasic_1Op_Pat; + +defm FRACT : InstMod_1Op_FPTypes<"fract", BrigOpcode.FRACT>; +defm : InstMod_1Op_FPTypes_Pat<"FRACT", HSAILfract, BrigRound.FLOAT_DEFAULT>; + +// Native Floating-Point Special Functions Operations +def NSIN_F32 : HSAILInstBasic_1Op<"nsin", BrigOpcode.NSIN, Inst_F32_F32>; +def NCOS_F32 : HSAILInstBasic_1Op<"ncos", BrigOpcode.NCOS, Inst_F32_F32>; +def NEXP2_F32 : HSAILInstBasic_1Op<"nexp2", BrigOpcode.NEXP2, Inst_F32_F32>; +def NLOG2_F32 : HSAILInstBasic_1Op<"nlog2", BrigOpcode.NLOG2, Inst_F32_F32>; + +def : InstBasic_1Op_Pat; +def : InstBasic_1Op_Pat; +def : InstBasic_1Op_Pat; +def : InstBasic_1Op_Pat; + + +//////////////////////////////////////////////////////////////////////////////// +// Logical bit ops. + +defm AND : InstBasic_2Op_BitTypes<"and", BrigOpcode.AND>; +defm OR : InstBasic_2Op_BitTypes<"or", BrigOpcode.OR>; +defm XOR : InstBasic_2Op_BitTypes<"xor", BrigOpcode.XOR>; + +defm : InstBasic_2Op_BitIntTypes_Pat<"AND", and>; +defm : InstBasic_2Op_BitIntTypes_Pat<"OR", or>; +defm : InstBasic_2Op_BitIntTypes_Pat<"XOR", xor>; + +//////////////////////////////////////////////////////////////////////////////// +// not (integer, 1-bit, 32-bit, and 64-bit) +defm NOT : InstBasic_1Op_BitTypes<"not", BrigOpcode.NOT>; +defm : InstBasic_1Op_BitTypes_Pat<"NOT", not>; + +//////////////////////////////////////////////////////////////////////////////// +// cvt + +defm CVT : InstCvt_DestTypes<"cvt", BrigOpcode.CVT>; + +//////////////////////////////////////////////////////////////////////////////// +// popcount (integer, 1-bit, 32-bit, and 64-bit) + +// Patterns for integer intrinsics that match to InstSourceType. The +// LLVM intrinsics return an integer with equivalent width as the +// source, but the HSAIL instructions truncate to a 32-bit result. +multiclass InstSourceType_1Op_Pats { + def : Pat< + (i64 (node i64:$src0)), + (CVT_U64_U32 0, 0, BrigType.U64, BrigType.U32, + (i32 (!cast(inst#!if(bit_src, "_B64", "_U64")) i64:$src0, BrigType.U32, !if(bit_src, BrigType.B64, BrigType.U64)))) + >; + + def : Pat< + (i32 (trunc (node i64:$src0))), + (i32 (!cast(inst#!if(bit_src, "_B64", "_U64")) i64:$src0, BrigType.U32, !if(bit_src, BrigType.B64, BrigType.U64))) + >; + + def : Pat< + (i32 (node i32:$src0)), + (!cast(inst#!if(bit_src, "_B32", "_U32")) i32:$src0, BrigType.U32, !if(bit_src, BrigType.B32, BrigType.U32)) + >; +} + +defm POPCOUNT : InstSourceType_1Op_U32_BitTypes<"popcount", BrigOpcode.POPCOUNT>; + +defm : InstSourceType_1Op_Pats<"POPCOUNT_U32", ctpop, 1>; + +//////////////////////////////////////////////////////////////////////////////// +// firstbit (integer, 1-bit, 32-bit, and 64-bit) + +defm FIRSTBIT : InstSourceType_1Op_U32_IntTypes<"firstbit", BrigOpcode.FIRSTBIT>; +defm : InstSourceType_1Op_Pats<"FIRSTBIT_U32", ctlz>; +def : InstSourceType_1Op_Pat; + +//////////////////////////////////////////////////////////////////////////////// +// lastbit (integer, 1-bit, 32-bit, and 64-bit) +defm LASTBIT : InstSourceType_1Op_U32_IntTypes<"lastbit", BrigOpcode.LASTBIT>; + +defm : InstSourceType_1Op_Pats<"LASTBIT_U32", cttz>; +def : InstSourceType_1Op_Pat; + +//////////////////////////////////////////////////////////////////////////////// +// signbit (integer, 1-bit, 32-bit, and 64-bit) + +//////////////////////////////////////////////////////////////////////////////// +// shl (integer, signed and unsigned, 32-bit and 64-bit) +// shr (integer, signed and unsigned, 32-bit and 64-bit) +defm SHL : InstBasic_2Op_ShiftTypes<"shl", BrigOpcode.SHL>; +defm SHR : InstBasic_2Op_ShiftTypes<"shr", BrigOpcode.SHR>; + +def : InstBasic_2Op_Pat; +def : InstBasic_2Op_Pat; + +def : InstBasic_2Op_Pat; +def : InstBasic_2Op_Pat; + +def : InstBasic_2Op_Pat; +def : InstBasic_2Op_Pat; + +//////////////////////////////////////////////////////////////////////////////// +// cmov (1-bit, 32-bit, and 64-bit; integer and float) + +defm CMOV : InstBasic_3Op_CMov<"cmov", BrigOpcode.CMOV>; + +// Reduce complexity to prefer selecting various min / max patterns on +// select. +let AddedComplexity = -10 in { + def : InstBasic_CMov_Pat; + def : InstBasic_CMov_Pat; + def : InstBasic_CMov_Pat; +} + +//////////////////////////////////////////////////////////////////////////////// +// bitrev (integer, 32-bit, and 64-bit) + +//////////////////////////////////////////////////////////////////////////////// +// extract (integer, 32-bit, and 64-bit) + +//////////////////////////////////////////////////////////////////////////////// +// insert (integer, 32-bit, and 64-bit) + +//////////////////////////////////////////////////////////////////////////////// +// mov + +let isAsCheapAsAMove = 1, isReMaterializable = 1 in { + // FIXME: Spec calls TypeLength moveType for some reason, but that + // would be painful to deal with. + defm MOV : InstBasic_1Op_BF<"mov", BrigOpcode.MOV>; +} + + +// FIXME: Omitting 'node' in the output pattern results in invalid +// output with a mov using and defining the same register. +class MovImmPat : Pat < + (vt node:$src0), + (movInst node:$src0, bt) +>; + +class BitconvertMovPat : Pat < + (destvt (bitconvert srcvt:$src0)), + (movInst $src0, bt) +>; + + +def : MovImmPat; +def : MovImmPat; +def : MovImmPat; +def : MovImmPat; +def : MovImmPat; + +def : BitconvertMovPat; +def : BitconvertMovPat; +def : BitconvertMovPat; +def : BitconvertMovPat; + + +//////////////////////////////////////////////////////////////////////////////// +// misc operations + +defm BITSELECT : InstBasic_3Op_BitTypes<"bitselect", BrigOpcode.BITSELECT>; +defm : InstBasic_3Op_BitTypes_Pat<"BITSELECT", HSAILbitselect>; + +defm PACK : InstSourceType_3Op_Pack_Types<"pack", BrigOpcode.PACK>; + +// Media - packcvt +def PACKCVT_U8X4_F32 : HSAILInstSourceType_4Op<"packcvt", BrigOpcode.PACKCVT, Inst_U8X4_F32_F32_F32_F32>; + +def : InstSourceType_4Op_Pat; + +// Media - unpackcvt + +// TODO: src1 must be immediate. +def UNPACKCVT_F32_U8X4 : HSAILInstSourceType_2Op<"unpackcvt", BrigOpcode.UNPACKCVT, Inst_F32_U8X4_U32>; +def : InstSourceType_2Op_Pat; + +defm BITALIGN : InstBasic_3Op_BitTypes<"bitalign", BrigOpcode.BITALIGN>; +defm BYTEALIGN : InstBasic_3Op_BitTypes<"bytealign", BrigOpcode.BYTEALIGN>; + +def : InstBasic_3Op_Pat; +def : InstBasic_3Op_Pat; + +def LERP_U8X4 : HSAILInstBasic_3Op<"lerp", BrigOpcode.LERP, Inst_U8X4_U8X4_U8X4_U8X4>; +def : InstBasic_3Op_Pat; + +defm SAD : InstSourceType_3Op_Sad_Types<"sad", BrigOpcode.SAD>; +def : InstSourceType_3Op_Pat; + +def SADHI_U16X2_U8X4 : HSAILInstSourceType_3Op<"sadhi", BrigOpcode.SADHI, Inst_U16X2_U8X4_U8X4_U16X2>; +def : InstSourceType_3Op_Pat; + +// media_ops2 + +def GCN_MSAD_B32 : HSAILInstBasic_3Op<"gcn_msad", BrigOpcode.GCNMSAD, Inst_B32_B32_B32_B32>; +def : InstBasic_3Op_Pat; + +def GCN_SADW_B32 : HSAILInstBasic_3Op<"gcn_sadw", BrigOpcode.GCNSADW, Inst_B32_B32_B32_B32>; +def : InstBasic_3Op_Pat; + +def GCN_SADD_B32 : HSAILInstBasic_3Op<"gcn_sadd", BrigOpcode.GCNSADD, Inst_B32_B32_B32_B32>; +def : InstBasic_3Op_Pat; + +defm GCN_MIN3 : InstBasic_3Op_SUF<"gcn_min3", BrigOpcode.GCNMIN3>; +def : InstBasic_3Op_Pat; +def : InstBasic_3Op_Pat; +def : InstBasic_3Op_Pat; + +defm GCN_MAX3 : InstBasic_3Op_SUF<"gcn_max3", BrigOpcode.GCNMAX3>; +def : InstBasic_3Op_Pat; +def : InstBasic_3Op_Pat; +def : InstBasic_3Op_Pat; + +defm GCN_MED3 : InstBasic_3Op_SUF<"gcn_med3", BrigOpcode.GCNMED3>; +def : InstBasic_3Op_Pat; +def : InstBasic_3Op_Pat; +def : InstBasic_3Op_Pat; + +defm BITEXTRACT : InstBasic_3Op_BitExtract<"bitextract", BrigOpcode.BITEXTRACT>; +defm : InstBasic_3Op_BitExtract_IntTypes_Pat<"BITEXTRACT", HSAILubitextract, 0>; +defm : InstBasic_3Op_BitExtract_IntTypes_Pat<"BITEXTRACT", HSAILsbitextract, 1>; + + +def GCN_BFM_B32 : HSAILInstBasic_2Op<"gcn_bfm", BrigOpcode.GCNBFM, Inst_B32_B32_B32>; +def : InstBasic_2Op_Pat; + +def GCN_QSAD_B64 : HSAILInstBasic_3Op<"gcn_qsad", BrigOpcode.GCNQSAD, Inst_B64_B64_B64_B64>; +def : InstBasic_3Op_Pat; + +def GCN_MQSAD : HSAILInstBasic_3Op<"gcn_mqsad", BrigOpcode.GCNMQSAD, Inst_B64_B64_B32_B64>; + +def : Pat< + (int_HSAIL_mqsad (i64 (GPROrImm i64:$src0)), i32:$src1, i64:$src2), + (GCN_MQSAD $src0, $src1, $src2, BrigType.B64) +>; + + +defm GCN_FLDEXP : InstBasic_2Op_LdExp<"gcn_fldexp", BrigOpcode.GCNFLDEXP>; +def : InstBasic_2Op_Pat; +def : InstBasic_2Op_Pat; Index: lib/Target/HSAIL/HSAILAsmPrinter.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILAsmPrinter.h @@ -0,0 +1,89 @@ +//===-- HSAILAsmPrinter.h - Print HSAIL assembly code -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief HSAIL Assembly printer class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_HSAILASMPRINTER_H +#define LLVM_LIB_TARGET_HSAIL_HSAILASMPRINTER_H + +#include "llvm/CodeGen/AsmPrinter.h" + +namespace llvm { + +class ConstantFP; +class MachineFrameInfo; + +template class SmallString; + +class HSAILAsmPrinter : public AsmPrinter { +private: + typedef std::pair AddrInit; + + StringRef getArgTypeName(Type *Ty, bool Signed = false) const; + + void EmitFunctionArgument(unsigned ParamIndex, const Argument &Arg, + bool IsKernel, bool IsSExt, raw_ostream &O) const; + void EmitFunctionReturn(Type *Ty, StringRef Name, bool IsKernel, bool IsSExt, + raw_ostream &O) const; + void EmitFunctionLabel(const Function &F, raw_ostream &O, bool IsDecl) const; + + static char getSymbolPrefixForAddressSpace(unsigned AS); + char getSymbolPrefix(const MCSymbol &S) const; + + void printInitVarWithAddressPragma(StringRef VarName, uint64_t Offset, + const MCExpr *Expr, unsigned EltSize, + raw_ostream &O); + + void printFloat(uint32_t, raw_ostream &O); + void printDouble(uint64_t, raw_ostream &O); + void printConstantFP(const ConstantFP *CV, raw_ostream &O); + void printScalarConstant(const Constant *CV, SmallVectorImpl &Addrs, + uint64_t &TotalSizeEmitted, const DataLayout &DL, + raw_ostream &O); + + void printGVInitialValue(const GlobalValue &GV, const Constant *CV, + const DataLayout &DL, raw_ostream &O); + +public: + explicit HSAILAsmPrinter(TargetMachine &TM, + std::unique_ptr Streamer); + + bool doFinalization(Module &M) override; + bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { return "HSAIL Assembly Printer"; } + + void getHSAILMangledName(SmallString<256> &Out, const GlobalValue *GV) const; + void EmitGlobalVariable(const GlobalVariable *GV) override; + void EmitStartOfAsmFile(Module &) override; + void EmitFunctionEntryLabel() override; + + void checkModuleSubtargetExtensions(const Module &M, + bool &IsFullProfile, + bool &IsGCN, + bool &HasImages) const; + static bool isHSAILInstrinsic(StringRef str); + + void computeStackUsage(const MachineFrameInfo *MFI, + uint64_t &PrivateSize, + unsigned &PrivateAlign, + uint64_t &SpillSize, + unsigned &SpillAlign) const; + + void EmitFunctionBodyStart() override; + void EmitFunctionBodyEnd() override; + void EmitInstruction(const MachineInstr *MI) override; +}; + +} + +#endif Index: lib/Target/HSAIL/HSAILAsmPrinter.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILAsmPrinter.cpp @@ -0,0 +1,906 @@ +//===-- HSAILAsmPrinter.cpp - HSAIL Assembly printer ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// +/// The HSAILAsmPrinter is used to print both assembly string and also binary +/// code. When passed an MCAsmStreamer it prints assembly and when passed +/// an MCObjectStreamer it outputs binary code. +// +//===----------------------------------------------------------------------===// +// + +#include "HSAILAsmPrinter.h" +#include "HSAIL.h" +#include "HSAILMCInstLower.h" +#include "HSAILUtilityFunctions.h" +#include "HSAILSubtarget.h" +#include "HSAILStoreInitializer.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetLoweringObjectFile.h" + +using namespace llvm; + +#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN +#include "HSAILGenIntrinsics.inc" +#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN + + +extern "C" void LLVMInitializeHSAILAsmPrinter() { + RegisterAsmPrinter Target32(TheHSAIL_32Target); + RegisterAsmPrinter Target64(TheHSAIL_64Target); +} + +HSAILAsmPrinter::HSAILAsmPrinter(TargetMachine &TM, + std::unique_ptr Streamer) + : AsmPrinter(TM, std::move(Streamer)) {} + +bool HSAILAsmPrinter::doFinalization(Module &M) { + EmitEndOfAsmFile(M); + return false; +} + +bool HSAILAsmPrinter::runOnMachineFunction(MachineFunction &MF) { + SetupMachineFunction(MF); + + // FIXME: Hack. Reset CurrentFnSym using the correctly mangled name. The + // generic code doesn't understand HSAIL's weird global symbol prefix rules, + // since LLVM doesn't have the concept of function scope globals that need a + // different prefix. + SmallString<256> Name; + getHSAILMangledName(Name, MF.getFunction()); + CurrentFnSym = OutContext.GetOrCreateSymbol(Name); + + +// EmitFunctionEntryLabel(); + OutStreamer->SwitchSection(getObjFileLowering().getTextSection()); + EmitFunctionBody(); + + return false; +} + +static bool canInitAddressSpace(unsigned AS) { + return AS == HSAILAS::READONLY_ADDRESS || AS == HSAILAS::GLOBAL_ADDRESS; +} + +static StringRef getSegmentName(unsigned AS) { + switch (AS) { + case HSAILAS::GLOBAL_ADDRESS: + return "global"; + case HSAILAS::READONLY_ADDRESS: + return "readonly"; + case HSAILAS::GROUP_ADDRESS: + return "group"; + case HSAILAS::PRIVATE_ADDRESS: + return "private"; + default: + llvm_unreachable("unhandled segment"); + } +} + +static bool isProgramLinkage(const GlobalValue &GV) { + switch (GV.getLinkage()) { + case GlobalValue::ExternalLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + case GlobalValue::AvailableExternallyLinkage: + case GlobalValue::ExternalWeakLinkage: + case GlobalValue::AppendingLinkage: + return true; + + default: + return false; + } +} + +static bool isModuleLinkage(const GlobalValue &GV) { + return !isProgramLinkage(GV); +} + +void HSAILAsmPrinter::EmitFunctionArgument(unsigned ParamIndex, + const Argument &A, bool IsKernel, + bool IsSExt, raw_ostream &O) const { + const DataLayout &DL = getDataLayout(); + Type *Ty = A.getType(); + + unsigned NElts = ~0u; + Type *EltTy = HSAIL::analyzeType(Ty, NElts, DL); + + if (NElts > 1) { + unsigned ABIAlign = DL.getABITypeAlignment(Ty); + if (ABIAlign != DL.getABITypeAlignment(EltTy)) + O << "align(" << ABIAlign << ") "; + } + + // TODO_HSA: Need to emit alignment information. + O << (IsKernel ? "kernarg" : "arg") << '_' << getArgTypeName(EltTy, IsSExt) + << ' ' << '%'; + + if (MF) { + const HSAILParamManager &PM = + MF->getInfo()->getParamManager(); + + O << PM.getParamName(ParamIndex); + } else { + // If we don't have a machine function, we are just printing the + // declaration. The name doesn't matter so much. + + StringRef Name = A.getName(); + if (Name.empty()) + O << "arg_p" << ParamIndex; + else + O << Name; + } + + // For vector args, we'll use an HSAIL array. + if (NElts != 0) + O << '[' << NElts << ']'; +} + +void HSAILAsmPrinter::EmitFunctionReturn(Type *Ty, StringRef Name, + bool IsKernel, bool IsSExt, + raw_ostream &O) const { + const DataLayout &DL = getDataLayout(); + + unsigned NElts = ~0u; + Type *EltTy = HSAIL::analyzeType(Ty, NElts, DL); + + if (NElts > 1) { + unsigned ABIAlign = DL.getABITypeAlignment(Ty); + if (ABIAlign != DL.getABITypeAlignment(EltTy)) + O << "align(" << ABIAlign << ") "; + } + + O << (IsKernel ? "kernarg" : "arg") << '_' << getArgTypeName(EltTy, IsSExt) + << ' ' << '%' << Name; + if (NElts != 0) + O << '[' << NElts << ']'; +} + +void HSAILAsmPrinter::EmitFunctionLabel(const Function &F, raw_ostream &O, + bool IsDecl) const { + Type *RetTy = F.getReturnType(); + + // FIXME: Should define HSA calling conventions. + bool IsKernel = HSAIL::isKernelFunc(&F); + + SmallString<256> Name; + getHSAILMangledName(Name, &F); + + if (!MAI->isValidUnquotedName(Name)) + report_fatal_error("Unsupported symbol name"); + + O << (IsKernel ? "kernel " : "function ") << Name << '('; + + // Functions with kernel linkage cannot have output args. + if (!IsKernel) { + if (!RetTy->isVoidTy()) { + StringRef RetName("ret"); + SmallString<256> ReturnName; + if (!IsDecl) { + getNameWithPrefix(ReturnName, &F); + RetName = ReturnName; + } + + const auto &RetAttrs = F.getAttributes().getRetAttributes(); + + bool IsSExt = + RetAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt); + bool IsZExt = + RetAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt); + + if (IsSExt || IsZExt) { + EmitFunctionReturn(Type::getInt32Ty(RetTy->getContext()), RetName, + IsKernel, IsSExt, O); + } else + EmitFunctionReturn(RetTy, RetName, IsKernel, IsSExt, O); + } + + O << ")("; + } + + const auto &Attrs = F.getAttributes(); + + // Avoid ugly line breaks with small argument lists. + unsigned NArgs = F.arg_size(); + if (NArgs == 0) { + O << ')'; + } else if (NArgs == 1) { + bool IsSExt = Attrs.hasAttribute(1, Attribute::SExt); + EmitFunctionArgument(0, *F.arg_begin(), IsKernel, IsSExt, O); + O << ')'; + } else { + O << "\n\t"; + + // Loop through all of the parameters and emit the types and corresponding + // names. + unsigned Index = 0; + for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); + I != E; ++Index) { + bool IsSExt = Attrs.hasAttribute(Index + 1, Attribute::SExt); + EmitFunctionArgument(Index, *I++, IsKernel, IsSExt, O); + if (I != E) + O << ",\n\t"; + } + + O << ')'; + } +} + +// FIXME: Doesn't make sense to rely on address space for this. +char HSAILAsmPrinter::getSymbolPrefixForAddressSpace(unsigned AS) { + return (AS == HSAILAS::GROUP_ADDRESS || AS == HSAILAS::PRIVATE_ADDRESS) ? '%' + : '&'; +} + +// FIXME: Duplicated in BRIGAsmPrinter +char HSAILAsmPrinter::getSymbolPrefix(const MCSymbol &Sym) const { + const GlobalVariable *GV = MMI->getModule()->getNamedGlobal(Sym.getName()); + assert(GV && "Need prefix for undefined GlobalVariable"); + + unsigned AS = GV->getType()->getAddressSpace(); + return getSymbolPrefixForAddressSpace(AS); +} + +void HSAILAsmPrinter::printInitVarWithAddressPragma(StringRef VarName, + uint64_t BaseOffset, + const MCExpr *Expr, + unsigned EltSize, + raw_ostream &O) { + MCValue Val; + bool Res = Expr->EvaluateAsRelocatable(Val, nullptr, nullptr); + (void)Res; + assert(Res && "Could not evaluate MCExpr"); + assert(!Val.getSymB() && "Multi-symbol expressions not handled"); + + const MCSymbol &Sym = Val.getSymA()->getSymbol(); + + O << "pragma \"initvarwithaddress:" << VarName << ':' + << BaseOffset // Offset into the destination. + << ':' << EltSize << ':' << getSymbolPrefix(Sym) << Sym.getName() << ':' + << Val.getConstant() // Offset of the symbol being written. + << '\"' << ';' << '\n'; +} + +void HSAILAsmPrinter::printFloat(uint32_t Val, raw_ostream &O) { + O << format("0F%" PRIx32, Val); +} + +void HSAILAsmPrinter::printDouble(uint64_t Val, raw_ostream &O) { + O << format("0F%" PRIx64, Val); +} + +void HSAILAsmPrinter::printConstantFP(const ConstantFP *CFP, raw_ostream &O) { + uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + if (CFP->getType()->isFloatTy()) + printFloat(static_cast(Val), O); + else if (CFP->getType()->isDoubleTy()) + printDouble(Val, O); + else + llvm_unreachable("unhandled ConstantFP"); +} + +void HSAILAsmPrinter::printScalarConstant(const Constant *CPV, + SmallVectorImpl &Addrs, + uint64_t &TotalSizeEmitted, + const DataLayout &DL, + raw_ostream &O) { + if (const ConstantInt *CI = dyn_cast(CPV)) { + TotalSizeEmitted += DL.getTypeAllocSize(CI->getType()); + O << CI->getValue(); + return; + } + + if (const ConstantFP *CFP = dyn_cast(CPV)) { + TotalSizeEmitted += DL.getTypeAllocSize(CFP->getType()); + printConstantFP(CFP, O); + return; + } + + if (const ConstantDataSequential *CDS = + dyn_cast(CPV)) { + for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) { + if (I > 0) + O << ", "; + + const Constant *Elt = CDS->getElementAsConstant(I); + printScalarConstant(Elt, Addrs, TotalSizeEmitted, DL, O); + } + + return; + } + + if (isa(CPV)) { + TotalSizeEmitted += DL.getTypeAllocSize(CPV->getType()); + O << '0'; + return; + } + + if (const GlobalValue *GV = dyn_cast(CPV)) { + O << '0'; + + auto ME = MCSymbolRefExpr::Create(getSymbol(GV), OutContext); + Addrs.push_back(std::make_pair(TotalSizeEmitted, ME)); + TotalSizeEmitted += DL.getTypeAllocSize(GV->getType()); + return; + } + + if (const ConstantExpr *CExpr = dyn_cast(CPV)) { + const MCExpr *ME = lowerConstant(CPV); + O << '0'; + + Addrs.push_back(std::make_pair(TotalSizeEmitted, ME)); + TotalSizeEmitted += DL.getTypeAllocSize(CExpr->getType()); + return; + } + + llvm_unreachable("unhandled scalar constant type"); +} + +void HSAILAsmPrinter::printGVInitialValue(const GlobalValue &GV, + const Constant *CV, + const DataLayout &DL, + raw_ostream &O) { + if (const ConstantInt *CI = dyn_cast(CV)) { + if (CI->getType()->isIntegerTy(1)) + O << (CI->getZExtValue() ? '1' : '0') << ';'; + else + O << CI->getValue() << ';'; + return; + } + + if (const ConstantFP *CFP = dyn_cast(CV)) { + printConstantFP(CFP, O); + O << ';'; + return; + } + + unsigned NElts = 1; + Type *EltTy = HSAIL::analyzeType(CV->getType(), NElts, DL); + + unsigned EltSize = DL.getTypeAllocSize(EltTy); + SmallVector AddrInits; + + // Write other cases as byte array. + StoreInitializer store(EltTy, *this); + + store.append(CV, GV.getName()); + + // Make sure this is actually an array. For the special case of a single + // pointer initializer, we don't want the braces. + if (NElts != 0) + O << getArgTypeName(EltTy) << "[]("; + + store.print(O); + + if (NElts != 0) + O << ')'; + + O << ';'; + + if (!store.varInitAddresses().empty()) + O << '\n'; + + for (const auto &VarInit : store.varInitAddresses()) { + char Pre = getSymbolPrefixForAddressSpace(GV.getType()->getAddressSpace()); + SmallString<128> Name; + Name += Pre; + Name += GV.getName(); + + printInitVarWithAddressPragma(Name, VarInit.BaseOffset, VarInit.Expr, + EltSize, O); + } + + O << '\n'; +} + +void HSAILAsmPrinter::getHSAILMangledName(SmallString<256> &NameStr, + const GlobalValue *GV) const { + if (isa(GV)) { + NameStr += '&'; + } else if (const GlobalAlias *GA = dyn_cast(GV)) { + if (isa(GA->getAliasee())) + NameStr += '&'; + else + llvm_unreachable("Not handled"); + } else { + unsigned AS = GV->getType()->getAddressSpace(); + NameStr += getSymbolPrefixForAddressSpace(AS); + } + + SmallString<256> Mangled; + SmallString<256> Sanitized; + + getNameWithPrefix(Mangled, GV); + + NameStr += Mangled; + +#if 0 + // FIXME: We need a way to deal with invalid identifiers, e.g. leading + // period. We can replace them with something here, but need a way to resolve + // possible conflicts. + if (HSAIL::sanitizedGlobalValueName(Mangled, Sanitized)) + NameStr += Sanitized; + else + NameStr += Mangled; +#endif +} + +// FIXME: Mostly duplicated in BRIGAsmPrinter +static void printAlignTypeQualifier(const GlobalValue &GV, const DataLayout &DL, + Type *InitTy, Type *EmitTy, unsigned NElts, + bool IsLocal, raw_ostream &O) { + unsigned Alignment = GV.getAlignment(); + if (Alignment == 0) + Alignment = DL.getPrefTypeAlignment(InitTy); + else { + // If an alignment is specified, it must be equal to or greater than the + // variable's natural alignment. + unsigned NaturalAlign = IsLocal ? DL.getPrefTypeAlignment(EmitTy) + : DL.getABITypeAlignment(EmitTy); + + Alignment = std::max(Alignment, NaturalAlign); + } + + // Align arrays at least by 4 bytes + if (Alignment < 4 && NElts != 0) + Alignment = 4; + + if (Alignment != DL.getABITypeAlignment(EmitTy)) + O << "align(" << Alignment << ") "; +} + +void HSAILAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { + if (HSAIL::isIgnoredGV(GV)) + return; + + SmallString<256> Name; + getHSAILMangledName(Name, GV); + + SmallString<1024> Str; + raw_svector_ostream O(Str); + const DataLayout &DL = getDataLayout(); + + PointerType *Ty = GV->getType(); + Type *InitTy = Ty->getElementType(); + unsigned AS = Ty->getAddressSpace(); + + if (isProgramLinkage(*GV)) + O << "prog "; + + if (AS != HSAILAS::READONLY_ADDRESS) + O << "alloc(agent) "; + + unsigned NElts = ~0u; + Type *EmitTy = HSAIL::analyzeType(InitTy, NElts, DL); + + printAlignTypeQualifier(*GV, DL, InitTy, EmitTy, NElts, false, O); + + O << getSegmentName(AS) << '_' << getArgTypeName(EmitTy) << ' ' << Name; + + if (NElts != 0) + O << '[' << NElts << ']'; + + // TODO_HSA: if group memory has initializer, then emit instructions to + // initialize dynamically. + if (GV->hasInitializer() && canInitAddressSpace(AS)) { + const Constant *Init = cast(GV->getInitializer()); + + if (isa(Init)) + O << ';'; + else { + O << " = "; + + // Emit trivial zero initializers as a single 0. + if (Init->isNullValue()) { + Type *Ty = Init->getType(); + if (Ty->isAggregateType() || Ty->isVectorTy()) { + O << getArgTypeName(EmitTy) << "[]("; + + // FIXME: Use uint64_t for NElts + for (unsigned I = 0; I < NElts; ++I) { + if (I > 0) + O << ", "; + O << '0'; + } + + O << ')'; + } else + O << '0'; + O << ';'; + } else { + printGVInitialValue(*GV, Init, DL, O); + } + } + } else { + O << ';'; + } + + OutStreamer->EmitRawText(O.str()); +} + +// Check if any defined functions use subtargets that require extensions. +void HSAILAsmPrinter::checkModuleSubtargetExtensions(const Module &M, + bool &IsFullProfile, + bool &IsGCN, + bool &HasImages) const { + IsFullProfile = false; + IsGCN = false; + HasImages = false; + + for (const Function &F : M) { + const HSAILSubtarget &ST = TM.getSubtarget(F); + + if (ST.isFullProfile()) + IsFullProfile = true; + + if (ST.isGCN()) + IsGCN = true; + + if (ST.hasImages()) + HasImages = true; + + // Stop looking if there are no more subtarget extensions to check for, + // which is the most common case. + if (IsFullProfile && IsGCN && HasImages) + break; + } +} + +bool HSAILAsmPrinter::isHSAILInstrinsic(StringRef str) { + if ((HSAILIntrinsic::ID)Intrinsic::not_intrinsic != + getIntrinsicForGCCBuiltin("HSAIL", str.data())) + return true; + return str.startswith(StringRef("llvm.HSAIL.")); +} + +void HSAILAsmPrinter::EmitStartOfAsmFile(Module &M) { + SmallString<32> Str; + raw_svector_ostream O(Str); + + Triple TT(TM.getTargetTriple()); + bool IsLargeModel = (TT.getArch() == Triple::hsail64); + + bool IsFullProfile, IsGCN, HasImages; + checkModuleSubtargetExtensions(M, IsFullProfile, IsGCN, HasImages); + + O << "module &__llvm_hsail_module:" << BRIG_VERSION_HSAIL_MAJOR << ':' + << BRIG_VERSION_HSAIL_MINOR << ':' + << (IsFullProfile ? "$full" : "$base") << ':' + << (IsLargeModel ? "$large" : "$small") << ':' + << "$near" // TODO: Get from somewhere + << ";\n\n"; + + if (IsGCN) + O << "extension \"amd:gcn\";\n"; + + if (HasImages) + O << "extension \"IMAGE\";\n"; + + OutStreamer->EmitRawText(O.str()); + + for (const GlobalVariable &GV : M.globals()) { + unsigned AS = GV.getType()->getAddressSpace(); + if (AS != HSAILAS::PRIVATE_ADDRESS && AS != HSAILAS::GROUP_ADDRESS) + EmitGlobalVariable(&GV); + } + + // Emit function declarations, except for kernels or intrinsics. + for (const Function &F : M) { + if (F.isIntrinsic()) + continue; + + if (F.isDeclaration() && isModuleLinkage(F)) + continue; + + if (!HSAIL::isKernelFunc(&F) && !isHSAILInstrinsic(F.getName())) { + Str.clear(); + O.resync(); + + O << "decl "; + + if (isProgramLinkage(F)) + O << "prog "; + + EmitFunctionLabel(F, O, true); + O << ";\n\n"; + OutStreamer->EmitRawText(O.str()); + } + } +} + +StringRef HSAILAsmPrinter::getArgTypeName(Type *Ty, bool Signed) const { + switch (Ty->getTypeID()) { + case Type::VoidTyID: + break; + case Type::FloatTyID: + return "f32"; + case Type::DoubleTyID: + return "f64"; + case Type::IntegerTyID: { + switch (Ty->getIntegerBitWidth()) { + case 32: + return Signed ? "s32" : "u32"; + case 64: + return Signed ? "s64" : "u64"; + case 1: + return "b1"; + case 8: + return Signed ? "s8" : "u8"; + case 16: + return Signed ? "s16" : "u16"; + default: + llvm_unreachable("unhandled integer width argument"); + } + } + case Type::PointerTyID: { + const PointerType *PT = cast(Ty); + const StructType *ST = dyn_cast(PT->getElementType()); + if (ST && ST->isOpaque()) { + StringRef Name = ST->getName(); + if (Name.startswith("struct._image1d_t") || + Name.startswith("struct._image1d_array_t") || + Name.startswith("struct._image1d_buffer_t") || + Name.startswith("struct._image2d_t") || + Name.startswith("struct._image2d_array_t") || + Name.startswith("struct._image3d_t")) { + return "_RWImg"; + } else if (Name.startswith("struct._sampler_t")) { + return "_Samp"; + } else if (Name == "struct._counter32_t" || Name == "struct._event_t") { + const DataLayout &DL = getDataLayout(); + return DL.getPointerSize(HSAILAS::GLOBAL_ADDRESS) == 4 ? "u64" : "u32"; + } else { + llvm_unreachable("unhandled struct type argument"); + } + } else { + unsigned AS = PT->getAddressSpace(); + return getDataLayout().getPointerSize(AS) == 4 ? "u32" : "u64"; + } + } + case Type::StructTyID: // Treat struct as array of bytes. + return "u8"; + + case Type::VectorTyID: + case Type::ArrayTyID: { + // Treat as array of elements. + const SequentialType *ST = cast(Ty); + + return getArgTypeName(ST->getElementType()); + } + default: + llvm_unreachable("unhandled argument type id"); + } + + return ""; +} + +void HSAILAsmPrinter::EmitFunctionEntryLabel() { + std::string FunStr; + raw_string_ostream O(FunStr); + + const Function *F = MF->getFunction(); + + if (isProgramLinkage(*F)) + O << "prog "; + EmitFunctionLabel(*F, O, false); + O << "\n{"; + + OutStreamer->EmitRawText(O.str()); +} + +void HSAILAsmPrinter::computeStackUsage(const MachineFrameInfo *MFI, + uint64_t &PrivateSize, + unsigned &PrivateAlign, + uint64_t &SpillSize, + unsigned &SpillAlign) const { + SpillSize = 0; + PrivateSize = 0; + PrivateAlign = 4; + SpillAlign = 4; + + // The stack objects have been preprocessed by + // processFunctionBeforeFrameFinalized so that we only expect the last two + // frame objects. + for (int I = MFI->getObjectIndexBegin(), E = MFI->getObjectIndexEnd(); + I != E; ++I) { + if (MFI->isDeadObjectIndex(I)) + continue; + + if (MFI->isSpillSlotObjectIndex(I)) { + assert(SpillSize == 0 && "Only one spill object should be seen"); + + SpillSize = MFI->getObjectSize(I); + SpillAlign = MFI->getObjectAlignment(I); + } else { + assert(PrivateSize == 0 && "Only one private object should be seen"); + + PrivateSize = MFI->getObjectSize(I); + PrivateAlign = MFI->getObjectAlignment(I); + } + } +} + +void HSAILAsmPrinter::EmitFunctionBodyStart() { + std::string FunStr; + raw_string_ostream O(FunStr); + + const DataLayout &DL = getDataLayout(); + +#if 0 + if (isKernelFunc(*F)) { // Emitting block data inside of kernel. + uint32_t id = 0; + mMeta->setID(id); + mMeta->setKernel(true); + ++mBuffer; + if (isKernel) { + mMeta->printHeader(mKernelName); + if (isOpenCLKernel) + mMeta->processArgMetadata(O, mBuffer, isKernel); + mMeta->printMetaData(O, id, isKernel); + } + } +#endif + + SmallPtrSet FuncPvtVarsSet; + SmallPtrSet FuncGrpVarsSet; + for (const MachineBasicBlock &MBB : *MF) { + for (const MachineInstr &MI : MBB) { + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isGlobal()) + continue; + + const GlobalVariable *GV = dyn_cast(MO.getGlobal()); + if (!GV) + continue; + + unsigned AS = GV->getType()->getAddressSpace(); + if (AS == HSAILAS::PRIVATE_ADDRESS) + FuncPvtVarsSet.insert(GV); + + if (AS == HSAILAS::GROUP_ADDRESS) + FuncGrpVarsSet.insert(GV); + } + } + } + + // Emit group variable declarations. + const Module *M = MF->getMMI().getModule(); + for (const GlobalVariable &GV : M->globals()) { + PointerType *Ty = GV.getType(); + unsigned AS = Ty->getAddressSpace(); + if (AS == HSAILAS::GROUP_ADDRESS) { + if (FuncGrpVarsSet.count(&GV)) { + std::string str; + O << '\t'; + + Type *InitTy = Ty->getElementType(); + + unsigned NElts = ~0u; + Type *EmitTy = HSAIL::analyzeType(InitTy, NElts, DL); + printAlignTypeQualifier(GV, DL, InitTy, EmitTy, NElts, true, O); + + O << getSegmentName(AS) << '_' << getArgTypeName(EmitTy) << " %" + << GV.getName(); + + if (NElts != 0) + O << '[' << NElts << ']'; + + O << ";\n"; + } + } + } + + O << '\n'; + + // Emit private variable declarations. + for (const GlobalVariable &GV : M->globals()) { + PointerType *Ty = GV.getType(); + unsigned AS = Ty->getAddressSpace(); + if (AS == HSAILAS::PRIVATE_ADDRESS) { + if (FuncPvtVarsSet.count(&GV)) { + StringRef GVname = GV.getName(); + bool ChangeName = false; + SmallVector NameParts; + const char *tmp_opt_name = "tmp_opt_var"; + std::string str; + if (GVname.empty()) { + str = tmp_opt_name; + ChangeName = true; + } else if (!isalpha(GVname[0]) && GVname[0] != '_') { + str = tmp_opt_name; + str.append(GVname); + ChangeName = true; + } + + { // replace all '.' with '_' + size_t pos = str.find('.'); + if (pos != std::string::npos) + ChangeName = true; + + while (pos != std::string::npos) { + str.replace(pos++, 1, "_"); + pos = str.find('.', pos); + } + } + + if (ChangeName) { + // FIXME + (const_cast(&GV))->setName(str); + } + + O << '\t'; + + Type *InitTy = Ty->getElementType(); + + unsigned NElts = ~0u; + Type *EmitTy = HSAIL::analyzeType(InitTy, NElts, DL); + + printAlignTypeQualifier(GV, DL, InitTy, EmitTy, NElts, true, O); + str = ""; + + O << '_' << getArgTypeName(EmitTy) << " %" << GV.getName(); + if (NElts != 0) + O << '[' << NElts << ']'; + + if (GV.hasInitializer() && canInitAddressSpace(AS)) { + O << " = "; + printGVInitialValue(GV, cast(GV.getInitializer()), DL, O); + } + } + } + } + + const MachineFrameInfo *MFI = MF->getFrameInfo(); + + uint64_t SpillSize, PrivateSize; + unsigned PrivateAlign, SpillAlign; + computeStackUsage(MFI, PrivateSize, PrivateAlign, SpillSize, SpillAlign); + + if (PrivateSize != 0) { + O << "\talign(" << PrivateAlign + << ") private_u8 %__privateStack[" << PrivateSize << "];\n"; + } + + if (SpillSize != 0) { + O << "\talign(" << SpillAlign + << ") spill_u8 %__spillStack[" << SpillSize << "];\n"; + } + + const HSAILMachineFunctionInfo *Info = MF->getInfo(); + if (Info->hasScavengerSpill()) + O << "\tspill_u32 %___spillScavenge;"; + + +#if 0 + // Allocate gcn region for gcn atomic counter, if required. + if (usesGCNAtomicCounter()) + O << "\tgcn_region_alloc 4;\n"; +#endif + + OutStreamer->EmitRawText(O.str()); +} + +void HSAILAsmPrinter::EmitFunctionBodyEnd() { OutStreamer->EmitRawText("};"); } + +void HSAILAsmPrinter::EmitInstruction(const MachineInstr *MI) { + HSAILMCInstLower MCInstLowering(OutContext, *this); + + MCInst TmpInst; + MCInstLowering.lower(MI, TmpInst); + EmitToStreamer(*OutStreamer, TmpInst); +} Index: lib/Target/HSAIL/HSAILAtomics.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILAtomics.td @@ -0,0 +1,114 @@ +//===------------------------------------------------------*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//////////////////////////////////////////////////////////////////////////////// +// atomics + +let mayLoad = 1, mayStore = 1, hasSideEffects = 1, HasDefaultSegment = 1 in { + def GCN_ATOMIC_APPEND_U32 : HSAILInstAddr_1Op<"gcn_atomic_append", BrigOpcode.GCNAPPEND, Inst_U32_U32>; + def GCN_ATOMIC_CONSUME_U32 : HSAILInstAddr_1Op<"gcn_atomic_consume", BrigOpcode.GCNCONSUME, Inst_U32_U32>; +} + +def : InstAddr_1Op_Pat< + GCN_ATOMIC_APPEND_U32, + int_HSAIL_gcn_atomic_append_u32, + BrigType.U32, + AddressSpace.REGION +>; + +def : InstAddr_1Op_Pat< + GCN_ATOMIC_CONSUME_U32, + int_HSAIL_gcn_atomic_consume_u32, + BrigType.U32, + AddressSpace.REGION +>; + +// We must define a new opcode for each atomic operation because we +// can't change the properties of the instruction based on the +// operation modifier operand. e.g. mayLoad must always be the same +// for different instances of the instruction. +multiclass InstAtomicOp brigVal> { + defm _ADD : InstAtomic_1Op_IntTypes; + defm _AND : InstAtomic_1Op_BitTypes; + defm _CAS : InstAtomic_2Op_BitTypes; + defm _EXCH : InstAtomic_1Op_BitTypes; + + let mayStore = 0 in { + defm _LD : InstAtomic_0Op_BitTypes; + } + + defm _MAX : InstAtomic_1Op_IntTypes; + defm _MIN : InstAtomic_1Op_IntTypes; + defm _OR : InstAtomic_1Op_BitTypes; + + defm _SUB : InstAtomic_1Op_IntTypes; + defm _WRAPDEC : InstAtomic_1Op_IntTypes; + defm _WRAPINC : InstAtomic_1Op_IntTypes; + defm _XOR : InstAtomic_1Op_BitTypes; +} + +let hasPostISelHook = 1 in { + defm ATOMIC : InstAtomicOp<"atomic", BrigOpcode.ATOMIC>; +} + +multiclass InstAtomicOp_NoRet brigVal> { + defm _ADD : InstAtomic_1Op_NoRet_IntTypes; + defm _AND : InstAtomic_1Op_NoRet_BitTypes; + defm _EXCH : InstAtomic_1Op_NoRet_BitTypes; + defm _MAX : InstAtomic_1Op_NoRet_IntTypes; + defm _MIN : InstAtomic_1Op_NoRet_IntTypes; + defm _OR : InstAtomic_1Op_NoRet_BitTypes; + + let mayLoad = 0 in { + defm _ST : InstAtomic_1Op_NoRet_BitTypes; + } + + defm _SUB : InstAtomic_1Op_NoRet_IntTypes; + defm _WRAPDEC : InstAtomic_1Op_NoRet_IntTypes; + defm _WRAPINC : InstAtomic_1Op_NoRet_IntTypes; + defm _XOR : InstAtomic_1Op_NoRet_BitTypes; +} + + +// atomic is replaced by atomicnoret in the post-isel hook if there +// are no uses of the returned value. +defm ATOMICNORET : InstAtomicOp_NoRet<"atomicnoret", BrigOpcode.ATOMICNORET>; + + +// def atomic_load_inc : PatFrag< +// (ops node:$ptr), +// (atomic_load_add node:$ptr, 1) +// >; + +// def atomic_load_dec : PatFrag< +// (ops node:$ptr), +// (atomic_load_sub node:$ptr, 1) +// >; + + +// let AddedComplexity = 1 in { +// defm : AtomicPat_0Op_IntTypes; +// defm : AtomicPat_0Op_IntTypes; +// } + +defm : AtomicPat_0Op_BitTypes<"ATOMIC_LD", atomic_load>; +defm : AtomicPat_1Op_NoRet_BitTypes<"ATOMICNORET_ST", atomic_store>; + +defm : AtomicPat_1Op_BitTypes<"ATOMIC_EXCH", atomic_swap>; +defm : AtomicPat_1Op_IntTypes<"ATOMIC_ADD", atomic_load_add, 1>; +defm : AtomicPat_1Op_IntTypes<"ATOMIC_SUB", atomic_load_sub, 1>; +defm : AtomicPat_1Op_BitTypes<"ATOMIC_AND", atomic_load_and>; +defm : AtomicPat_1Op_BitTypes<"ATOMIC_OR", atomic_load_or>; +defm : AtomicPat_1Op_BitTypes<"ATOMIC_XOR", atomic_load_xor>; +defm : AtomicPat_1Op_IntTypes<"ATOMIC_MIN", atomic_load_min, 1>; +defm : AtomicPat_1Op_IntTypes<"ATOMIC_MAX", atomic_load_max, 1>; +defm : AtomicPat_1Op_IntTypes<"ATOMIC_MIN", atomic_load_umin, 0>; +defm : AtomicPat_1Op_IntTypes<"ATOMIC_MAX", atomic_load_umax, 0>; + +defm : AtomicPat_2Op_BitTypes<"ATOMIC_CAS", atomic_cmp_swap>; Index: lib/Target/HSAIL/HSAILBrig.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILBrig.h @@ -0,0 +1,16 @@ +//===-- HSAILBrig.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_HSAILBRIG_H +#define LLVM_LIB_TARGET_HSAIL_HSAILBRIG_H + +#include "llvm/Support/DataTypes.h" +#include "libHSAIL/Brig.h" + +#endif Index: lib/Target/HSAIL/HSAILBrigDefs.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILBrigDefs.h @@ -0,0 +1,1130 @@ +//===-- BRIGEnums.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_BRIGENUMS_H +#define LLVM_LIB_TARGET_HSAIL_BRIGENUMS_H + +#if HSAIL_USE_LIBHSAIL +#include "HSAILBrig.h" +#else + +#include + +typedef uint32_t BrigVersion32_t; + +enum BrigVersion { + + //.nowrap + //.nodump + //.nollvm + + BRIG_VERSION_HSAIL_MAJOR = 1, + BRIG_VERSION_HSAIL_MINOR = 0, + BRIG_VERSION_BRIG_MAJOR = 1, + BRIG_VERSION_BRIG_MINOR = 0 +}; + +typedef uint8_t BrigAlignment8_t; //.defValue=BRIG_ALIGNMENT_NONE + +typedef uint8_t BrigAllocation8_t; //.defValue=BRIG_ALLOCATION_NONE + +typedef uint8_t BrigAluModifier8_t; + +typedef uint8_t BrigAtomicOperation8_t; + +typedef uint32_t BrigCodeOffset32_t; //.defValue=0 //.wtype=ItemRef + +typedef uint8_t BrigCompareOperation8_t; + +typedef uint16_t BrigControlDirective16_t; + +typedef uint32_t BrigDataOffset32_t; + +typedef BrigDataOffset32_t BrigDataOffsetCodeList32_t; //.wtype=ListRef //.defValue=0 + +typedef BrigDataOffset32_t BrigDataOffsetOperandList32_t; //.wtype=ListRef //.defValue=0 + +typedef BrigDataOffset32_t BrigDataOffsetString32_t; //.wtype=StrRef //.defValue=0 + +typedef uint8_t BrigExecutableModifier8_t; + +typedef uint8_t BrigImageChannelOrder8_t; //.defValue=BRIG_CHANNEL_ORDER_UNKNOWN + +typedef uint8_t BrigImageChannelType8_t; //.defValue=BRIG_CHANNEL_TYPE_UNKNOWN + +typedef uint8_t BrigImageGeometry8_t; //.defValue=BRIG_GEOMETRY_UNKNOWN + +typedef uint8_t BrigImageQuery8_t; + +typedef uint16_t BrigKind16_t; + +typedef uint8_t BrigLinkage8_t; //.defValue=BRIG_LINKAGE_NONE + +typedef uint8_t BrigMachineModel8_t; //.defValue=BRIG_MACHINE_LARGE + +typedef uint8_t BrigMemoryModifier8_t; + +typedef uint8_t BrigMemoryOrder8_t; //.defValue=BRIG_MEMORY_ORDER_RELAXED + +typedef uint8_t BrigMemoryScope8_t; //.defValue=BRIG_MEMORY_SCOPE_SYSTEM + +typedef uint16_t BrigOpcode16_t; + +typedef uint32_t BrigOperandOffset32_t; //.defValue=0 //.wtype=ItemRef + +typedef uint8_t BrigPack8_t; //.defValue=BRIG_PACK_NONE + +typedef uint8_t BrigProfile8_t; //.defValue=BRIG_PROFILE_FULL + +typedef uint16_t BrigRegisterKind16_t; + +typedef uint8_t BrigRound8_t; //.defValue=BRIG_ROUND_NONE + +typedef uint8_t BrigSamplerAddressing8_t; //.defValue=BRIG_ADDRESSING_CLAMP_TO_EDGE + +typedef uint8_t BrigSamplerCoordNormalization8_t; + +typedef uint8_t BrigSamplerFilter8_t; + +typedef uint8_t BrigSamplerQuery8_t; + +typedef uint32_t BrigSectionIndex32_t; + +typedef uint8_t BrigSegCvtModifier8_t; + +typedef uint8_t BrigSegment8_t; //.defValue=BRIG_SEGMENT_NONE + +typedef uint32_t BrigStringOffset32_t; //.defValue=0 //.wtype=StrRef + +typedef uint16_t BrigType16_t; + +typedef uint8_t BrigVariableModifier8_t; + +typedef uint8_t BrigWidth8_t; + +typedef uint32_t BrigExceptions32_t; + +enum BrigKind { + + //.nollvm + // + //.wname={ s/^BRIG_KIND//; MACRO2Name($_) } + //.mnemo=$wname{ $wname } + // + //.sizeof=$wname{ "sizeof(".$structs->{"Brig".$wname}->{rawbrig}.")" } + //.sizeof_switch //.sizeof_proto="int size_of_brig_record(unsigned arg)" //.sizeof_default="return -1" + // + //.isBodyOnly={ "false" } + //.isBodyOnly_switch //.isBodyOnly_proto="bool isBodyOnly(Directive d)" //.isBodyOnly_arg="d.kind()" + //.isBodyOnly_default="assert(false); return false" + // + //.isToplevelOnly={ "false" } + //.isToplevelOnly_switch //.isToplevelOnly_proto="bool isToplevelOnly(Directive d)" //.isToplevelOnly_arg="d.kind()" + //.isToplevelOnly_default="assert(false); return false" + + BRIG_KIND_NONE = 0x0000, //.skip + + BRIG_KIND_DIRECTIVE_BEGIN = 0x1000, //.skip + BRIG_KIND_DIRECTIVE_ARG_BLOCK_END = 0x1000, //.isBodyOnly=true + BRIG_KIND_DIRECTIVE_ARG_BLOCK_START = 0x1001, //.isBodyOnly=true + BRIG_KIND_DIRECTIVE_COMMENT = 0x1002, + BRIG_KIND_DIRECTIVE_CONTROL = 0x1003, //.isBodyOnly=true + BRIG_KIND_DIRECTIVE_EXTENSION = 0x1004, //.isToplevelOnly=true + BRIG_KIND_DIRECTIVE_FBARRIER = 0x1005, + BRIG_KIND_DIRECTIVE_FUNCTION = 0x1006, //.isToplevelOnly=true + BRIG_KIND_DIRECTIVE_INDIRECT_FUNCTION = 0x1007, //.isToplevelOnly=true + BRIG_KIND_DIRECTIVE_KERNEL = 0x1008, //.isToplevelOnly=true + BRIG_KIND_DIRECTIVE_LABEL = 0x1009, //.isBodyOnly=true + BRIG_KIND_DIRECTIVE_LOC = 0x100a, + BRIG_KIND_DIRECTIVE_MODULE = 0x100b, //.isToplevelOnly=true + BRIG_KIND_DIRECTIVE_PRAGMA = 0x100c, + BRIG_KIND_DIRECTIVE_SIGNATURE = 0x100d, //.isToplevelOnly=true + BRIG_KIND_DIRECTIVE_VARIABLE = 0x100e, + BRIG_KIND_DIRECTIVE_END = 0x100f, //.skip + + BRIG_KIND_INST_BEGIN = 0x2000, //.skip + BRIG_KIND_INST_ADDR = 0x2000, + BRIG_KIND_INST_ATOMIC = 0x2001, + BRIG_KIND_INST_BASIC = 0x2002, + BRIG_KIND_INST_BR = 0x2003, + BRIG_KIND_INST_CMP = 0x2004, + BRIG_KIND_INST_CVT = 0x2005, + BRIG_KIND_INST_IMAGE = 0x2006, + BRIG_KIND_INST_LANE = 0x2007, + BRIG_KIND_INST_MEM = 0x2008, + BRIG_KIND_INST_MEM_FENCE = 0x2009, + BRIG_KIND_INST_MOD = 0x200a, + BRIG_KIND_INST_QUERY_IMAGE = 0x200b, + BRIG_KIND_INST_QUERY_SAMPLER = 0x200c, + BRIG_KIND_INST_QUEUE = 0x200d, + BRIG_KIND_INST_SEG = 0x200e, + BRIG_KIND_INST_SEG_CVT = 0x200f, + BRIG_KIND_INST_SIGNAL = 0x2010, + BRIG_KIND_INST_SOURCE_TYPE = 0x2011, + BRIG_KIND_INST_END = 0x2012, //.skip + + BRIG_KIND_OPERAND_BEGIN = 0x3000, //.skip + BRIG_KIND_OPERAND_ADDRESS = 0x3000, + BRIG_KIND_OPERAND_ALIGN = 0x3001, + BRIG_KIND_OPERAND_CODE_LIST = 0x3002, + BRIG_KIND_OPERAND_CODE_REF = 0x3003, + BRIG_KIND_OPERAND_CONSTANT_BYTES = 0x3004, + BRIG_KIND_OPERAND_RESERVED = 0x3005, //.skip + BRIG_KIND_OPERAND_CONSTANT_IMAGE = 0x3006, + BRIG_KIND_OPERAND_CONSTANT_OPERAND_LIST = 0x3007, + BRIG_KIND_OPERAND_CONSTANT_SAMPLER = 0x3008, + BRIG_KIND_OPERAND_OPERAND_LIST = 0x3009, + BRIG_KIND_OPERAND_REGISTER = 0x300a, + BRIG_KIND_OPERAND_STRING = 0x300b, + BRIG_KIND_OPERAND_WAVESIZE = 0x300c, + BRIG_KIND_OPERAND_END = 0x300d //.skip +}; + +enum BrigAlignment { + + //.mnemo={ s/^BRIG_ALIGNMENT_//; lc } + //.mnemo_proto="const char* align2str(unsigned arg)" + // + //.bytes={ /(\d+)/ ? $1 : undef } + //.bytes_switch //.bytes_proto="unsigned align2num(unsigned arg)" //.bytes_default="assert(false); return -1" + // + //.rbytes=$bytes{ $bytes } + //.rbytes_switch //.rbytes_reverse //.rbytes_proto="BrigAlignment num2align(uint64_t arg)" + //.rbytes_default="return BRIG_ALIGNMENT_LAST" + // + //.print=$bytes{ $bytes>1 ? "_align($bytes)" : "" } + + BRIG_ALIGNMENT_NONE = 0, //.no_mnemo + BRIG_ALIGNMENT_1 = 1, //.mnemo="" + BRIG_ALIGNMENT_2 = 2, + BRIG_ALIGNMENT_4 = 3, + BRIG_ALIGNMENT_8 = 4, + BRIG_ALIGNMENT_16 = 5, + BRIG_ALIGNMENT_32 = 6, + BRIG_ALIGNMENT_64 = 7, + BRIG_ALIGNMENT_128 = 8, + BRIG_ALIGNMENT_256 = 9, + + BRIG_ALIGNMENT_LAST, //.skip + BRIG_ALIGNMENT_MAX = BRIG_ALIGNMENT_LAST - 1 //.skip +}; + +enum BrigAllocation { + + //.mnemo={ s/^BRIG_ALLOCATION_//;lc } + //.mnemo_token=EAllocKind + + BRIG_ALLOCATION_NONE = 0, //.mnemo="" + BRIG_ALLOCATION_PROGRAM = 1, + BRIG_ALLOCATION_AGENT = 2, + BRIG_ALLOCATION_AUTOMATIC = 3 +}; + +enum BrigAluModifierMask { + BRIG_ALU_FTZ = 1 +}; + +enum BrigAtomicOperation { + + //.tdcaption="Atomic Operations" + // + //.mnemo={ s/^BRIG_ATOMIC_//;lc } + //.mnemo_token=_EMAtomicOp + //.mnemo_context=EInstModifierInstAtomicContext + // + //.print=$mnemo{ "_$mnemo" } + + BRIG_ATOMIC_ADD = 0, + BRIG_ATOMIC_AND = 1, + BRIG_ATOMIC_CAS = 2, + BRIG_ATOMIC_EXCH = 3, + BRIG_ATOMIC_LD = 4, + BRIG_ATOMIC_MAX = 5, + BRIG_ATOMIC_MIN = 6, + BRIG_ATOMIC_OR = 7, + BRIG_ATOMIC_ST = 8, + BRIG_ATOMIC_SUB = 9, + BRIG_ATOMIC_WRAPDEC = 10, + BRIG_ATOMIC_WRAPINC = 11, + BRIG_ATOMIC_XOR = 12, + BRIG_ATOMIC_WAIT_EQ = 13, + BRIG_ATOMIC_WAIT_NE = 14, + BRIG_ATOMIC_WAIT_LT = 15, + BRIG_ATOMIC_WAIT_GTE = 16, + BRIG_ATOMIC_WAITTIMEOUT_EQ = 17, + BRIG_ATOMIC_WAITTIMEOUT_NE = 18, + BRIG_ATOMIC_WAITTIMEOUT_LT = 19, + BRIG_ATOMIC_WAITTIMEOUT_GTE = 20 +}; + +enum BrigCompareOperation { + + //.tdcaption="Comparison Operators" + // + //.mnemo={ s/^BRIG_COMPARE_//;lc } + //.mnemo_token=_EMCompare + // + //.print=$mnemo{ "_$mnemo" } + + BRIG_COMPARE_EQ = 0, + BRIG_COMPARE_NE = 1, + BRIG_COMPARE_LT = 2, + BRIG_COMPARE_LE = 3, + BRIG_COMPARE_GT = 4, + BRIG_COMPARE_GE = 5, + BRIG_COMPARE_EQU = 6, + BRIG_COMPARE_NEU = 7, + BRIG_COMPARE_LTU = 8, + BRIG_COMPARE_LEU = 9, + BRIG_COMPARE_GTU = 10, + BRIG_COMPARE_GEU = 11, + BRIG_COMPARE_NUM = 12, + BRIG_COMPARE_NAN = 13, + BRIG_COMPARE_SEQ = 14, + BRIG_COMPARE_SNE = 15, + BRIG_COMPARE_SLT = 16, + BRIG_COMPARE_SLE = 17, + BRIG_COMPARE_SGT = 18, + BRIG_COMPARE_SGE = 19, + BRIG_COMPARE_SGEU = 20, + BRIG_COMPARE_SEQU = 21, + BRIG_COMPARE_SNEU = 22, + BRIG_COMPARE_SLTU = 23, + BRIG_COMPARE_SLEU = 24, + BRIG_COMPARE_SNUM = 25, + BRIG_COMPARE_SNAN = 26, + BRIG_COMPARE_SGTU = 27 +}; + +enum BrigControlDirective { + + //.mnemo={ s/^BRIG_CONTROL_//;lc } + //.mnemo_token=EControl + // + //.print=$mnemo{ $mnemo } + + BRIG_CONTROL_NONE = 0, //.skip + BRIG_CONTROL_ENABLEBREAKEXCEPTIONS = 1, + BRIG_CONTROL_ENABLEDETECTEXCEPTIONS = 2, + BRIG_CONTROL_MAXDYNAMICGROUPSIZE = 3, + BRIG_CONTROL_MAXFLATGRIDSIZE = 4, + BRIG_CONTROL_MAXFLATWORKGROUPSIZE = 5, + BRIG_CONTROL_REQUIREDDIM = 6, + BRIG_CONTROL_REQUIREDGRIDSIZE = 7, + BRIG_CONTROL_REQUIREDWORKGROUPSIZE = 8, + BRIG_CONTROL_REQUIRENOPARTIALWORKGROUPS = 9 +}; + +enum BrigExecutableModifierMask { + //.nodump + BRIG_EXECUTABLE_DEFINITION = 1 +}; + +enum BrigImageChannelOrder { + + //.mnemo={ s/^BRIG_CHANNEL_ORDER_?//;lc } + //.mnemo_token=EImageOrder + //.mnemo_context=EImageOrderContext + // + //.print=$mnemo{ $mnemo } + + BRIG_CHANNEL_ORDER_A = 0, + BRIG_CHANNEL_ORDER_R = 1, + BRIG_CHANNEL_ORDER_RX = 2, + BRIG_CHANNEL_ORDER_RG = 3, + BRIG_CHANNEL_ORDER_RGX = 4, + BRIG_CHANNEL_ORDER_RA = 5, + BRIG_CHANNEL_ORDER_RGB = 6, + BRIG_CHANNEL_ORDER_RGBX = 7, + BRIG_CHANNEL_ORDER_RGBA = 8, + BRIG_CHANNEL_ORDER_BGRA = 9, + BRIG_CHANNEL_ORDER_ARGB = 10, + BRIG_CHANNEL_ORDER_ABGR = 11, + BRIG_CHANNEL_ORDER_SRGB = 12, + BRIG_CHANNEL_ORDER_SRGBX = 13, + BRIG_CHANNEL_ORDER_SRGBA = 14, + BRIG_CHANNEL_ORDER_SBGRA = 15, + BRIG_CHANNEL_ORDER_INTENSITY = 16, + BRIG_CHANNEL_ORDER_LUMINANCE = 17, + BRIG_CHANNEL_ORDER_DEPTH = 18, + BRIG_CHANNEL_ORDER_DEPTH_STENCIL = 19, + + // used internally + BRIG_CHANNEL_ORDER_UNKNOWN, //.mnemo="" // used when no order is specified + + BRIG_CHANNEL_ORDER_FIRST_USER_DEFINED = 128 //.skip + +}; + +enum BrigImageChannelType { + + //.mnemo={ s/^BRIG_CHANNEL_TYPE_//;lc } + //.mnemo_token=EImageFormat + // + //.print=$mnemo{ $mnemo } + + BRIG_CHANNEL_TYPE_SNORM_INT8 = 0, + BRIG_CHANNEL_TYPE_SNORM_INT16 = 1, + BRIG_CHANNEL_TYPE_UNORM_INT8 = 2, + BRIG_CHANNEL_TYPE_UNORM_INT16 = 3, + BRIG_CHANNEL_TYPE_UNORM_INT24 = 4, + BRIG_CHANNEL_TYPE_UNORM_SHORT_555 = 5, + BRIG_CHANNEL_TYPE_UNORM_SHORT_565 = 6, + BRIG_CHANNEL_TYPE_UNORM_INT_101010 = 7, + BRIG_CHANNEL_TYPE_SIGNED_INT8 = 8, + BRIG_CHANNEL_TYPE_SIGNED_INT16 = 9, + BRIG_CHANNEL_TYPE_SIGNED_INT32 = 10, + BRIG_CHANNEL_TYPE_UNSIGNED_INT8 = 11, + BRIG_CHANNEL_TYPE_UNSIGNED_INT16 = 12, + BRIG_CHANNEL_TYPE_UNSIGNED_INT32 = 13, + BRIG_CHANNEL_TYPE_HALF_FLOAT = 14, + BRIG_CHANNEL_TYPE_FLOAT = 15, + + // used internally + BRIG_CHANNEL_TYPE_UNKNOWN, //.mnemo="" + + BRIG_CHANNEL_TYPE_FIRST_USER_DEFINED = 128 //.skip +}; + +enum BrigImageGeometry { + + //.tdcaption="Geometry" + // + //.mnemo={ s/^BRIG_GEOMETRY_//;lc } + //.mnemo_token=EImageGeometry + // + //.dim={/_([0-9]+D)(A)?/ ? $1+(defined $2?1:0) : undef} + //.dim_switch //.dim_proto="unsigned getBrigGeometryDim(unsigned geo)" //.dim_arg="geo" + //.dim_default="assert(0); return 0" + // + //.depth={/DEPTH$/?"true":"false"} + //.depth_switch //.depth_proto="bool isBrigGeometryDepth(unsigned geo)" //.depth_arg="geo" + //.depth_default="return false" + + BRIG_GEOMETRY_1D = 0, + BRIG_GEOMETRY_2D = 1, + BRIG_GEOMETRY_3D = 2, + BRIG_GEOMETRY_1DA = 3, + BRIG_GEOMETRY_2DA = 4, + BRIG_GEOMETRY_1DB = 5, + BRIG_GEOMETRY_2DDEPTH = 6, + BRIG_GEOMETRY_2DADEPTH = 7, + + // used internally + BRIG_GEOMETRY_UNKNOWN, //.mnemo="" + + BRIG_GEOMETRY_FIRST_USER_DEFINED = 128 //.skip +}; + +enum BrigImageQuery { + + //.mnemo={ s/^BRIG_IMAGE_QUERY_//;lc } + // + //.print=$mnemo{ $mnemo } + + BRIG_IMAGE_QUERY_WIDTH = 0, + BRIG_IMAGE_QUERY_HEIGHT = 1, + BRIG_IMAGE_QUERY_DEPTH = 2, + BRIG_IMAGE_QUERY_ARRAY = 3, + BRIG_IMAGE_QUERY_CHANNELORDER = 4, + BRIG_IMAGE_QUERY_CHANNELTYPE = 5 +}; + +enum BrigLinkage { + + //.mnemo={ s/^BRIG_LINKAGE_//;s/NONE//;lc } + + BRIG_LINKAGE_NONE = 0, + BRIG_LINKAGE_PROGRAM = 1, + BRIG_LINKAGE_MODULE = 2, + BRIG_LINKAGE_FUNCTION = 3, + BRIG_LINKAGE_ARG = 4 +}; + +enum BrigMachineModel { + + //.mnemo={ s/^BRIG_MACHINE_//; '$'.lc } + //.mnemo_token=ETargetMachine + // + //.print=$mnemo{ $mnemo } + + BRIG_MACHINE_SMALL = 0, + BRIG_MACHINE_LARGE = 1, + + BRIG_MACHINE_UNDEF = 2 //.skip +}; + +enum BrigMemoryModifierMask { //.tddef=0 + BRIG_MEMORY_CONST = 1 +}; + +enum BrigMemoryOrder { + + //.mnemo={ s/^BRIG_MEMORY_ORDER_//; lc } + //.mnemo_token=_EMMemoryOrder + // + //.print=$mnemo{ "_$mnemo" } + + BRIG_MEMORY_ORDER_NONE = 0, //.mnemo="" + BRIG_MEMORY_ORDER_RELAXED = 1, //.mnemo=rlx + BRIG_MEMORY_ORDER_SC_ACQUIRE = 2, //.mnemo=scacq + BRIG_MEMORY_ORDER_SC_RELEASE = 3, //.mnemo=screl + BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE = 4, //.mnemo=scar + + BRIG_MEMORY_ORDER_LAST = 5 //.skip +}; + +enum BrigMemoryScope { + + //.mnemo={ s/^BRIG_MEMORY_SCOPE_//; lc } + //.mnemo_token=_EMMemoryScope + // + //.print=$mnemo{ $mnemo } + + BRIG_MEMORY_SCOPE_NONE = 0, //.mnemo="" + BRIG_MEMORY_SCOPE_WORKITEM = 1, //.mnemo="" + BRIG_MEMORY_SCOPE_WAVEFRONT = 2, //.mnemo=wave + BRIG_MEMORY_SCOPE_WORKGROUP = 3, //.mnemo=wg + BRIG_MEMORY_SCOPE_AGENT = 4, //.mnemo=agent + BRIG_MEMORY_SCOPE_SYSTEM = 5, //.mnemo=system + + BRIG_MEMORY_SCOPE_LAST = 6 //.skip +}; + +enum BrigOpcode { + + //.tdcaption="Instruction Opcodes" + // + //.k={ "BASIC" } + //.pscode=$k{ MACRO2Name("_".$k) } + //.opcodeparser=$pscode{ return $pscode && "parseMnemo$pscode" } + //.opcodeparser_incfile=ParserUtilities + //.opcodeparser_switch //.opcodeparser_proto="OpcodeParser getOpcodeParser(BrigOpcode16_t arg)" //.opcodeparser_default="return parseMnemoBasic" + // + //.psopnd={undef} + //.opndparser=$psopnd{ return $psopnd && "&Parser::parse$psopnd" } + //.opndparser_incfile=ParserUtilities + //.opndparser_switch //.opndparser_proto="Parser::OperandParser Parser::getOperandParser(BrigOpcode16_t arg)" //.opndparser_default="return &Parser::parseOperands" + // + //.mnemo={ s/^BRIG_OPCODE_//; s/GCN([^_])/GCN_$1/; lc } + //.mnemo_scanner=Instructions //.mnemo_token=EInstruction + //.mnemo_context=EDefaultContext + // + //.has_memory_order={undef} + //.semsupport=$has_memory_order{ return $has_memory_order && "true" } + // + //.hasType=$k{ return ($k and $k eq "BASIC_NO_TYPE") ? "false" : undef; } + //.hasType_switch //.hasType_proto="bool instHasType(BrigOpcode16_t arg)" //.hasType_default="return true" + // + //.opcodevis=$pscode{ s/^BRIG_OPCODE_//; sprintf("%-47s(","vis.visitOpcode_".$_) . ($pscode =~m/^(BasicOrMod|Nop)$/? "inst" : "HSAIL_ASM::Inst". ($pscode=~m/BasicNoType/? "Basic":$pscode) ."(inst)").")" } + //.opcodevis_switch //.opcodevis_proto="template RetType visitOpcode_gen(HSAIL_ASM::Inst inst, Visitor& vis)" + //.opcodevis_arg="inst.opcode()" //.opcodevis_default="return RetType()" + //.opcodevis_incfile=ItemUtils + // + //.ftz=$k{ return ($k eq "BASIC_OR_MOD" or $k eq "CMP" or $k eq "CVT") ? "true" : undef } + //.ftz_incfile=ItemUtils //.ftz_switch //.ftz_proto="inline bool instSupportsFtz(BrigOpcode16_t arg)" //.ftz_default="return false" + // + //.vecOpndIndex={undef} + //.vecOpndIndex_switch //.vecOpndIndex_proto="int vecOpndIndex(BrigOpcode16_t arg)" //.vecOpndIndex_default="return -1" + //.vecOpndIndex_incfile=ParserUtilities + // + //.numdst={undef} + //.numdst_switch //.numdst_proto="int instNumDstOperands(BrigOpcode16_t arg)" //.numdst_default="return 1" + // + //.print=$mnemo{ $mnemo } + + BRIG_OPCODE_NOP = 0, //.k=NOP //.hasType=false + BRIG_OPCODE_ABS = 1, //.k=BASIC_OR_MOD + BRIG_OPCODE_ADD = 2, //.k=BASIC_OR_MOD + BRIG_OPCODE_BORROW = 3, + BRIG_OPCODE_CARRY = 4, + BRIG_OPCODE_CEIL = 5, //.k=BASIC_OR_MOD + BRIG_OPCODE_COPYSIGN = 6, //.k=BASIC_OR_MOD + BRIG_OPCODE_DIV = 7, //.k=BASIC_OR_MOD + BRIG_OPCODE_FLOOR = 8, //.k=BASIC_OR_MOD + BRIG_OPCODE_FMA = 9, //.k=BASIC_OR_MOD + BRIG_OPCODE_FRACT = 10, //.k=BASIC_OR_MOD + BRIG_OPCODE_MAD = 11, //.k=BASIC_OR_MOD + BRIG_OPCODE_MAX = 12, //.k=BASIC_OR_MOD + BRIG_OPCODE_MIN = 13, //.k=BASIC_OR_MOD + BRIG_OPCODE_MUL = 14, //.k=BASIC_OR_MOD + BRIG_OPCODE_MULHI = 15, //.k=BASIC_OR_MOD + BRIG_OPCODE_NEG = 16, //.k=BASIC_OR_MOD + BRIG_OPCODE_REM = 17, + BRIG_OPCODE_RINT = 18, //.k=BASIC_OR_MOD + BRIG_OPCODE_SQRT = 19, //.k=BASIC_OR_MOD + BRIG_OPCODE_SUB = 20, //.k=BASIC_OR_MOD + BRIG_OPCODE_TRUNC = 21, //.k=BASIC_OR_MOD + BRIG_OPCODE_MAD24 = 22, + BRIG_OPCODE_MAD24HI = 23, + BRIG_OPCODE_MUL24 = 24, + BRIG_OPCODE_MUL24HI = 25, + BRIG_OPCODE_SHL = 26, + BRIG_OPCODE_SHR = 27, + BRIG_OPCODE_AND = 28, + BRIG_OPCODE_NOT = 29, + BRIG_OPCODE_OR = 30, + BRIG_OPCODE_POPCOUNT = 31, //.k=SOURCE_TYPE + BRIG_OPCODE_XOR = 32, + BRIG_OPCODE_BITEXTRACT = 33, + BRIG_OPCODE_BITINSERT = 34, + BRIG_OPCODE_BITMASK = 35, + BRIG_OPCODE_BITREV = 36, + BRIG_OPCODE_BITSELECT = 37, + BRIG_OPCODE_FIRSTBIT = 38, //.k=SOURCE_TYPE + BRIG_OPCODE_LASTBIT = 39, //.k=SOURCE_TYPE + BRIG_OPCODE_COMBINE = 40, //.k=SOURCE_TYPE //.vecOpndIndex=1 + BRIG_OPCODE_EXPAND = 41, //.k=SOURCE_TYPE //.vecOpndIndex=0 + BRIG_OPCODE_LDA = 42, //.k=ADDR + BRIG_OPCODE_MOV = 43, + BRIG_OPCODE_SHUFFLE = 44, + BRIG_OPCODE_UNPACKHI = 45, + BRIG_OPCODE_UNPACKLO = 46, + BRIG_OPCODE_PACK = 47, //.k=SOURCE_TYPE + BRIG_OPCODE_UNPACK = 48, //.k=SOURCE_TYPE + BRIG_OPCODE_CMOV = 49, + BRIG_OPCODE_CLASS = 50, //.k=SOURCE_TYPE + BRIG_OPCODE_NCOS = 51, + BRIG_OPCODE_NEXP2 = 52, + BRIG_OPCODE_NFMA = 53, + BRIG_OPCODE_NLOG2 = 54, + BRIG_OPCODE_NRCP = 55, + BRIG_OPCODE_NRSQRT = 56, + BRIG_OPCODE_NSIN = 57, + BRIG_OPCODE_NSQRT = 58, + BRIG_OPCODE_BITALIGN = 59, + BRIG_OPCODE_BYTEALIGN = 60, + BRIG_OPCODE_PACKCVT = 61, //.k=SOURCE_TYPE + BRIG_OPCODE_UNPACKCVT = 62, //.k=SOURCE_TYPE + BRIG_OPCODE_LERP = 63, + BRIG_OPCODE_SAD = 64, //.k=SOURCE_TYPE + BRIG_OPCODE_SADHI = 65, //.k=SOURCE_TYPE + BRIG_OPCODE_SEGMENTP = 66, //.k=SEG_CVT + BRIG_OPCODE_FTOS = 67, //.k=SEG_CVT + BRIG_OPCODE_STOF = 68, //.k=SEG_CVT + BRIG_OPCODE_CMP = 69, //.k=CMP + BRIG_OPCODE_CVT = 70, //.k=CVT + BRIG_OPCODE_LD = 71, //.k=MEM //.has_memory_order //.vecOpndIndex=0 + BRIG_OPCODE_ST = 72, //.k=MEM //.has_memory_order //.vecOpndIndex=0 //.numdst=0 + BRIG_OPCODE_ATOMIC = 73, //.k=ATOMIC + BRIG_OPCODE_ATOMICNORET = 74, //.k=ATOMIC //.numdst=0 + BRIG_OPCODE_SIGNAL = 75, //.k=SIGNAL + BRIG_OPCODE_SIGNALNORET = 76, //.k=SIGNAL //.numdst=0 + BRIG_OPCODE_MEMFENCE = 77, //.k=MEM_FENCE //.numdst=0 + BRIG_OPCODE_RDIMAGE = 78, //.k=IMAGE //.vecOpndIndex=0 + BRIG_OPCODE_LDIMAGE = 79, //.k=IMAGE //.vecOpndIndex=0 + BRIG_OPCODE_STIMAGE = 80, //.k=IMAGE //.vecOpndIndex=0 //.numdst=0 + BRIG_OPCODE_IMAGEFENCE = 81, //.k=BASIC_NO_TYPE + BRIG_OPCODE_QUERYIMAGE = 82, //.k=QUERY_IMAGE + BRIG_OPCODE_QUERYSAMPLER = 83, //.k=QUERY_SAMPLER + BRIG_OPCODE_CBR = 84, //.k=BR //.numdst=0 + BRIG_OPCODE_BR = 85, //.k=BR //.numdst=0 //.hasType=false + BRIG_OPCODE_SBR = 86, //.k=BR //.numdst=0 //.psopnd=SbrOperands + BRIG_OPCODE_BARRIER = 87, //.k=BR //.numdst=0 //.hasType=false + BRIG_OPCODE_WAVEBARRIER = 88, //.k=BR //.numdst=0 //.hasType=false + BRIG_OPCODE_ARRIVEFBAR = 89, //.k=BR //.numdst=0 //.hasType=false + BRIG_OPCODE_INITFBAR = 90, //.k=BASIC_NO_TYPE //.numdst=0 //.hasType=false + BRIG_OPCODE_JOINFBAR = 91, //.k=BR //.numdst=0 //.hasType=false + BRIG_OPCODE_LEAVEFBAR = 92, //.k=BR //.numdst=0 //.hasType=false + BRIG_OPCODE_RELEASEFBAR = 93, //.k=BASIC_NO_TYPE //.numdst=0 + BRIG_OPCODE_WAITFBAR = 94, //.k=BR //.numdst=0 //.hasType=false + BRIG_OPCODE_LDF = 95, + BRIG_OPCODE_ACTIVELANECOUNT = 96, //.k=LANE + BRIG_OPCODE_ACTIVELANEID = 97, //.k=LANE + BRIG_OPCODE_ACTIVELANEMASK = 98, //.k=LANE //.vecOpndIndex=0 + BRIG_OPCODE_ACTIVELANEPERMUTE = 99, //.k=LANE + BRIG_OPCODE_CALL = 100, //.k=BR //.psopnd=CallOperands //.numdst=0 //.hasType=false + BRIG_OPCODE_SCALL = 101, //.k=BR //.psopnd=CallOperands //.numdst=0 + BRIG_OPCODE_ICALL = 102, //.k=BR //.psopnd=CallOperands //.numdst=0 + BRIG_OPCODE_RET = 103, //.k=BASIC_NO_TYPE + BRIG_OPCODE_ALLOCA = 104, //.k=MEM + BRIG_OPCODE_CURRENTWORKGROUPSIZE = 105, + BRIG_OPCODE_CURRENTWORKITEMFLATID = 106, + BRIG_OPCODE_DIM = 107, + BRIG_OPCODE_GRIDGROUPS = 108, + BRIG_OPCODE_GRIDSIZE = 109, + BRIG_OPCODE_PACKETCOMPLETIONSIG = 110, + BRIG_OPCODE_PACKETID = 111, + BRIG_OPCODE_WORKGROUPID = 112, + BRIG_OPCODE_WORKGROUPSIZE = 113, + BRIG_OPCODE_WORKITEMABSID = 114, + BRIG_OPCODE_WORKITEMFLATABSID = 115, + BRIG_OPCODE_WORKITEMFLATID = 116, + BRIG_OPCODE_WORKITEMID = 117, + BRIG_OPCODE_CLEARDETECTEXCEPT = 118, //.numdst=0 + BRIG_OPCODE_GETDETECTEXCEPT = 119, + BRIG_OPCODE_SETDETECTEXCEPT = 120, //.numdst=0 + BRIG_OPCODE_ADDQUEUEWRITEINDEX = 121, //.k=QUEUE + BRIG_OPCODE_CASQUEUEWRITEINDEX = 122, //.k=QUEUE + BRIG_OPCODE_LDQUEUEREADINDEX = 123, //.k=QUEUE + BRIG_OPCODE_LDQUEUEWRITEINDEX = 124, //.k=QUEUE + BRIG_OPCODE_STQUEUEREADINDEX = 125, //.k=QUEUE //.numdst=0 + BRIG_OPCODE_STQUEUEWRITEINDEX = 126, //.k=QUEUE //.numdst=0 + BRIG_OPCODE_CLOCK = 127, + BRIG_OPCODE_CUID = 128, + BRIG_OPCODE_DEBUGTRAP = 129, //.numdst=0 + BRIG_OPCODE_GROUPBASEPTR = 130, + BRIG_OPCODE_KERNARGBASEPTR = 131, + BRIG_OPCODE_LANEID = 132, + BRIG_OPCODE_MAXCUID = 133, + BRIG_OPCODE_MAXWAVEID = 134, + BRIG_OPCODE_NULLPTR = 135, //.k=SEG + BRIG_OPCODE_WAVEID = 136, + BRIG_OPCODE_FIRST_USER_DEFINED = 32768, //.skip + + BRIG_OPCODE_GCNMADU = (1u << 15) | 0, //.k=BASIC_NO_TYPE + BRIG_OPCODE_GCNMADS = (1u << 15) | 1, //.k=BASIC_NO_TYPE + BRIG_OPCODE_GCNMAX3 = (1u << 15) | 2, + BRIG_OPCODE_GCNMIN3 = (1u << 15) | 3, + BRIG_OPCODE_GCNMED3 = (1u << 15) | 4, + BRIG_OPCODE_GCNFLDEXP = (1u << 15) | 5, //.k=BASIC_OR_MOD + BRIG_OPCODE_GCNFREXP_EXP = (1u << 15) | 6, //.k=BASIC_OR_MOD + BRIG_OPCODE_GCNFREXP_MANT = (1u << 15) | 7, //.k=BASIC_OR_MOD + BRIG_OPCODE_GCNTRIG_PREOP = (1u << 15) | 8, //.k=BASIC_OR_MOD + BRIG_OPCODE_GCNBFM = (1u << 15) | 9, + BRIG_OPCODE_GCNLD = (1u << 15) | 10, //.k=MEM //.has_memory_order //.vecOpndIndex=0 + BRIG_OPCODE_GCNST = (1u << 15) | 11, //.k=MEM //.has_memory_order //.vecOpndIndex=0 + BRIG_OPCODE_GCNATOMIC = (1u << 15) | 12, //.k=ATOMIC + BRIG_OPCODE_GCNATOMICNORET = (1u << 15) | 13, //.k=ATOMIC //.mnemo=gcn_atomicNoRet + BRIG_OPCODE_GCNSLEEP = (1u << 15) | 14, + BRIG_OPCODE_GCNPRIORITY = (1u << 15) | 15, + BRIG_OPCODE_GCNREGIONALLOC = (1u << 15) | 16, //.k=BASIC_NO_TYPE //.mnemo=gcn_region_alloc + BRIG_OPCODE_GCNMSAD = (1u << 15) | 17, + BRIG_OPCODE_GCNQSAD = (1u << 15) | 18, + BRIG_OPCODE_GCNMQSAD = (1u << 15) | 19, + BRIG_OPCODE_GCNMQSAD4 = (1u << 15) | 20, //.k=BASIC_NO_TYPE + BRIG_OPCODE_GCNSADW = (1u << 15) | 21, + BRIG_OPCODE_GCNSADD = (1u << 15) | 22, + BRIG_OPCODE_GCNCONSUME = (1u << 15) | 23, //.k=ADDR //.mnemo=gcn_atomic_consume + BRIG_OPCODE_GCNAPPEND = (1u << 15) | 24, //.k=ADDR //.mnemo=gcn_atomic_append + BRIG_OPCODE_GCNB4XCHG = (1u << 15) | 25, //.mnemo=gcn_b4xchg + BRIG_OPCODE_GCNB32XCHG = (1u << 15) | 26, //.mnemo=gcn_b32xchg + BRIG_OPCODE_GCNMAX = (1u << 15) | 27, + BRIG_OPCODE_GCNMIN = (1u << 15) | 28, + BRIG_OPCODE_GCNDIVRELAXED = (1u << 15) | 29, //.k=BASIC_OR_MOD + BRIG_OPCODE_GCNDIVRELAXEDNARROW = (1u << 15) | 30, +}; + +enum BrigPack { + + //.tdcaption="Packing" + // + //.mnemo={ s/^BRIG_PACK_//;s/SAT$/_sat/;lc } + //.mnemo_token=_EMPacking + // + //.print=$mnemo{ "_$mnemo" } + + BRIG_PACK_NONE = 0, //.mnemo="" + BRIG_PACK_PP = 1, + BRIG_PACK_PS = 2, + BRIG_PACK_SP = 3, + BRIG_PACK_SS = 4, + BRIG_PACK_S = 5, + BRIG_PACK_P = 6, + BRIG_PACK_PPSAT = 7, + BRIG_PACK_PSSAT = 8, + BRIG_PACK_SPSAT = 9, + BRIG_PACK_SSSAT = 10, + BRIG_PACK_SSAT = 11, + BRIG_PACK_PSAT = 12 +}; + +enum BrigProfile { + + //.mnemo={ s/^BRIG_PROFILE_//;'$'.lc } + //.mnemo_token=ETargetProfile + // + //.print=$mnemo{ $mnemo } + + BRIG_PROFILE_BASE = 0, + BRIG_PROFILE_FULL = 1, + + BRIG_PROFILE_UNDEF = 2 //.skip +}; + +enum BrigRegisterKind { + + //.mnemo={ s/^BRIG_REGISTER_KIND_//;'$'.lc(substr($_,0,1)) } + // + //.bits={ } + //.bits_switch //.bits_proto="unsigned getRegBits(BrigRegisterKind16_t arg)" //.bits_default="return (unsigned)-1" + // + //.nollvm + + BRIG_REGISTER_KIND_CONTROL = 0, //.bits=1 + BRIG_REGISTER_KIND_SINGLE = 1, //.bits=32 + BRIG_REGISTER_KIND_DOUBLE = 2, //.bits=64 + BRIG_REGISTER_KIND_QUAD = 3 //.bits=128 +}; + +enum BrigRound { + + //.mnemo={} + //.mnemo_fn=round2str //.mnemo_token=_EMRound + // + //.sat={/_SAT$/? "true" : "false"} + //.sat_switch //.sat_proto="bool isSatRounding(unsigned rounding)" //.sat_arg="rounding" + //.sat_default="return false" + // + //.sig={/_SIGNALING_/? "true" : "false"} + //.sig_switch //.sig_proto="bool isSignalingRounding(unsigned rounding)" //.sig_arg="rounding" + //.sig_default="return false" + // + //.int={/_INTEGER_/? "true" : "false"} + //.int_switch //.int_proto="bool isIntRounding(unsigned rounding)" //.int_arg="rounding" + //.int_default="return false" + // + //.flt={/_FLOAT_/? "true" : "false"} + //.flt_switch //.flt_proto="bool isFloatRounding(unsigned rounding)" //.flt_arg="rounding" + //.flt_default="return false" + // + //.print=$mnemo{ "_$mnemo" } + + BRIG_ROUND_NONE = 0, //.no_mnemo + BRIG_ROUND_FLOAT_DEFAULT = 1, //.no_mnemo + BRIG_ROUND_FLOAT_NEAR_EVEN = 2, //.mnemo=near + BRIG_ROUND_FLOAT_ZERO = 3, //.mnemo=zero + BRIG_ROUND_FLOAT_PLUS_INFINITY = 4, //.mnemo=up + BRIG_ROUND_FLOAT_MINUS_INFINITY = 5, //.mnemo=down + BRIG_ROUND_INTEGER_NEAR_EVEN = 6, //.mnemo=neari + BRIG_ROUND_INTEGER_ZERO = 7, //.mnemo=zeroi + BRIG_ROUND_INTEGER_PLUS_INFINITY = 8, //.mnemo=upi + BRIG_ROUND_INTEGER_MINUS_INFINITY = 9, //.mnemo=downi + BRIG_ROUND_INTEGER_NEAR_EVEN_SAT = 10, //.mnemo=neari_sat + BRIG_ROUND_INTEGER_ZERO_SAT = 11, //.mnemo=zeroi_sat + BRIG_ROUND_INTEGER_PLUS_INFINITY_SAT = 12, //.mnemo=upi_sat + BRIG_ROUND_INTEGER_MINUS_INFINITY_SAT = 13, //.mnemo=downi_sat + BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN = 14, //.mnemo=sneari + BRIG_ROUND_INTEGER_SIGNALING_ZERO = 15, //.mnemo=szeroi + BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY = 16, //.mnemo=supi + BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY = 17, //.mnemo=sdowni + BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN_SAT = 18, //.mnemo=sneari_sat + BRIG_ROUND_INTEGER_SIGNALING_ZERO_SAT = 19, //.mnemo=szeroi_sat + BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY_SAT = 20, //.mnemo=supi_sat + BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY_SAT = 21 //.mnemo=sdowni_sat +}; + +enum BrigSamplerAddressing { + + //.mnemo={ s/^BRIG_ADDRESSING_//;lc } + //.mnemo_token=ESamplerAddressingMode + + BRIG_ADDRESSING_UNDEFINED = 0, + BRIG_ADDRESSING_CLAMP_TO_EDGE = 1, + BRIG_ADDRESSING_CLAMP_TO_BORDER = 2, + BRIG_ADDRESSING_REPEAT = 3, + BRIG_ADDRESSING_MIRRORED_REPEAT = 4, + + BRIG_ADDRESSING_FIRST_USER_DEFINED = 128 //.skip +}; + +enum BrigSamplerCoordNormalization { + + //.mnemo={ s/^BRIG_COORD_//;lc } + //.mnemo_token=ESamplerCoord + // + //.print=$mnemo{ $mnemo } + + BRIG_COORD_UNNORMALIZED = 0, + BRIG_COORD_NORMALIZED = 1 +}; + +enum BrigSamplerFilter { + + //.mnemo={ s/^BRIG_FILTER_//;lc } + // + //.print=$mnemo{ $mnemo } + + BRIG_FILTER_NEAREST = 0, + BRIG_FILTER_LINEAR = 1, + + BRIG_FILTER_FIRST_USER_DEFINED = 128 //.skip +}; + +enum BrigSamplerQuery { + + //.mnemo={ s/^BRIG_SAMPLER_QUERY_//;lc } + //.mnemo_token=_EMSamplerQuery + // + //.print=$mnemo{ $mnemo } + + BRIG_SAMPLER_QUERY_ADDRESSING = 0, + BRIG_SAMPLER_QUERY_COORD = 1, + BRIG_SAMPLER_QUERY_FILTER = 2 +}; + +enum BrigSectionIndex { + + //.nollvm + // + //.mnemo={ s/^BRIG_SECTION_INDEX_/HSA_/;lc } + + BRIG_SECTION_INDEX_DATA = 0, + BRIG_SECTION_INDEX_CODE = 1, + BRIG_SECTION_INDEX_OPERAND = 2, + BRIG_SECTION_INDEX_BEGIN_IMPLEMENTATION_DEFINED = 3, + + // used internally + BRIG_SECTION_INDEX_IMPLEMENTATION_DEFINED = BRIG_SECTION_INDEX_BEGIN_IMPLEMENTATION_DEFINED //.skip +}; + +enum BrigSegCvtModifierMask { + BRIG_SEG_CVT_NONULL = 1 //.mnemo="nonull" //.print="_nonull" +}; + +enum BrigSegment { + + //.mnemo={ s/^BRIG_SEGMENT_//;lc} + //.mnemo_token=_EMSegment + //.mnemo_context=EInstModifierContext + // + //.print=$mnemo{ $mnemo ? "_$mnemo" : "" } + + BRIG_SEGMENT_NONE = 0, //.mnemo="" + BRIG_SEGMENT_FLAT = 1, //.mnemo="" + BRIG_SEGMENT_GLOBAL = 2, + BRIG_SEGMENT_READONLY = 3, + BRIG_SEGMENT_KERNARG = 4, + BRIG_SEGMENT_GROUP = 5, + BRIG_SEGMENT_PRIVATE = 6, + BRIG_SEGMENT_SPILL = 7, + BRIG_SEGMENT_ARG = 8, + + BRIG_SEGMENT_FIRST_USER_DEFINED = 128, //.skip + + BRIG_SEGMENT_AMD_GCN = 9, //.mnemo="region" +}; + +enum BrigPackedTypeBits { + + //.nodump + // + //.nollvm + + BRIG_TYPE_BASE_SIZE = 5, + BRIG_TYPE_PACK_SIZE = 2, + BRIG_TYPE_ARRAY_SIZE = 1, + + BRIG_TYPE_BASE_SHIFT = 0, + BRIG_TYPE_PACK_SHIFT = BRIG_TYPE_BASE_SHIFT + BRIG_TYPE_BASE_SIZE, + BRIG_TYPE_ARRAY_SHIFT = BRIG_TYPE_PACK_SHIFT + BRIG_TYPE_PACK_SIZE, + + BRIG_TYPE_BASE_MASK = ((1 << BRIG_TYPE_BASE_SIZE) - 1) << BRIG_TYPE_BASE_SHIFT, + BRIG_TYPE_PACK_MASK = ((1 << BRIG_TYPE_PACK_SIZE) - 1) << BRIG_TYPE_PACK_SHIFT, + BRIG_TYPE_ARRAY_MASK = ((1 << BRIG_TYPE_ARRAY_SIZE) - 1) << BRIG_TYPE_ARRAY_SHIFT, + + BRIG_TYPE_PACK_NONE = 0 << BRIG_TYPE_PACK_SHIFT, + BRIG_TYPE_PACK_32 = 1 << BRIG_TYPE_PACK_SHIFT, + BRIG_TYPE_PACK_64 = 2 << BRIG_TYPE_PACK_SHIFT, + BRIG_TYPE_PACK_128 = 3 << BRIG_TYPE_PACK_SHIFT, + + BRIG_TYPE_ARRAY = 1 << BRIG_TYPE_ARRAY_SHIFT +}; + +enum BrigType { + + //.numBits={ /ARRAY$/ ? undef : /([0-9]+)X([0-9]+)/ ? $1*$2 : /([0-9]+)/ ? $1 : undef } + //.numBits_switch //.numBits_proto="unsigned getBrigTypeNumBits(unsigned arg)" //.numBits_default="assert(0); return 0" + //.numBytes=$numBits{ $numBits > 1 ? $numBits/8 : undef } + //.numBytes_switch //.numBytes_proto="unsigned getBrigTypeNumBytes(unsigned arg)" //.numBytes_default="assert(0); return 0" + // + //.mnemo={ s/^BRIG_TYPE_//;lc } + //.mnemo_token=_EMType + // + //.array={/ARRAY$/?"true":"false"} + //.array_switch //.array_proto="bool isArrayType(unsigned type)" //.array_arg="type" + //.array_default="return false" + // + //.a2e={/(.*)_ARRAY$/? $1 : "BRIG_TYPE_NONE"} + //.a2e_switch //.a2e_proto="unsigned arrayType2elementType(unsigned type)" //.a2e_arg="type" + //.a2e_default="return BRIG_TYPE_NONE" + // + //.e2a={/_ARRAY$/? "BRIG_TYPE_NONE" : /_NONE$/ ? "BRIG_TYPE_NONE" : /_B1$/ ? "BRIG_TYPE_NONE" : $_ . "_ARRAY"} + //.e2a_switch //.e2a_proto="unsigned elementType2arrayType(unsigned type)" //.e2a_arg="type" + //.e2a_default="return BRIG_TYPE_NONE" + // + //.t2s={s/^BRIG_TYPE_//;lc s/_ARRAY$/[]/;lc} + //.t2s_switch //.t2s_proto="const char* type2name(unsigned type)" //.t2s_arg="type" + //.t2s_default="return NULL" + // + //.dispatch_switch //.dispatch_incfile=TemplateUtilities + //.dispatch_proto="template\nRetType dispatchByType_gen(unsigned type, Visitor& v)" + //.dispatch={ /ARRAY$/ ? "v.visitNone(type)" : /^BRIG_TYPE_([BUSF]|SIG)[0-9]+/ ? "v.template visit< BrigTypeTraits<$_> >()" : "v.visitNone(type)" } + //.dispatch_arg="type" //.dispatch_default="return v.visitNone(type)" + // + //- .tdname=BrigType + // + //.print=$mnemo{ "_$mnemo" } + + BRIG_TYPE_NONE = 0, //.mnemo="" //.print="" + BRIG_TYPE_U8 = 1, //.ctype=uint8_t + BRIG_TYPE_U16 = 2, //.ctype=uint16_t + BRIG_TYPE_U32 = 3, //.ctype=uint32_t + BRIG_TYPE_U64 = 4, //.ctype=uint64_t + BRIG_TYPE_S8 = 5, //.ctype=int8_t + BRIG_TYPE_S16 = 6, //.ctype=int16_t + BRIG_TYPE_S32 = 7, //.ctype=int32_t + BRIG_TYPE_S64 = 8, //.ctype=int64_t + BRIG_TYPE_F16 = 9, //.ctype=f16_t + BRIG_TYPE_F32 = 10, //.ctype=float + BRIG_TYPE_F64 = 11, //.ctype=double + BRIG_TYPE_B1 = 12, //.ctype=bool //.numBytes=1 + BRIG_TYPE_B8 = 13, //.ctype=uint8_t + BRIG_TYPE_B16 = 14, //.ctype=uint16_t + BRIG_TYPE_B32 = 15, //.ctype=uint32_t + BRIG_TYPE_B64 = 16, //.ctype=uint64_t + BRIG_TYPE_B128 = 17, //.ctype=b128_t + BRIG_TYPE_SAMP = 18, //.mnemo=samp //.numBits=64 + BRIG_TYPE_ROIMG = 19, //.mnemo=roimg //.numBits=64 + BRIG_TYPE_WOIMG = 20, //.mnemo=woimg //.numBits=64 + BRIG_TYPE_RWIMG = 21, //.mnemo=rwimg //.numBits=64 + BRIG_TYPE_SIG32 = 22, //.mnemo=sig32 //.numBits=64 + BRIG_TYPE_SIG64 = 23, //.mnemo=sig64 //.numBits=64 + + BRIG_TYPE_U8X4 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_32, //.ctype=uint8_t + BRIG_TYPE_U8X8 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_64, //.ctype=uint8_t + BRIG_TYPE_U8X16 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_128, //.ctype=uint8_t + BRIG_TYPE_U16X2 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_32, //.ctype=uint16_t + BRIG_TYPE_U16X4 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_64, //.ctype=uint16_t + BRIG_TYPE_U16X8 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_128, //.ctype=uint16_t + BRIG_TYPE_U32X2 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_64, //.ctype=uint32_t + BRIG_TYPE_U32X4 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_128, //.ctype=uint32_t + BRIG_TYPE_U64X2 = BRIG_TYPE_U64 | BRIG_TYPE_PACK_128, //.ctype=uint64_t + BRIG_TYPE_S8X4 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_32, //.ctype=int8_t + BRIG_TYPE_S8X8 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_64, //.ctype=int8_t + BRIG_TYPE_S8X16 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_128, //.ctype=int8_t + BRIG_TYPE_S16X2 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_32, //.ctype=int16_t + BRIG_TYPE_S16X4 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_64, //.ctype=int16_t + BRIG_TYPE_S16X8 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_128, //.ctype=int16_t + BRIG_TYPE_S32X2 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_64, //.ctype=int32_t + BRIG_TYPE_S32X4 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_128, //.ctype=int32_t + BRIG_TYPE_S64X2 = BRIG_TYPE_S64 | BRIG_TYPE_PACK_128, //.ctype=int64_t + BRIG_TYPE_F16X2 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_32, //.ctype=f16_t + BRIG_TYPE_F16X4 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_64, //.ctype=f16_t + BRIG_TYPE_F16X8 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_128, //.ctype=f16_t + BRIG_TYPE_F32X2 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_64, //.ctype=float + BRIG_TYPE_F32X4 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_128, //.ctype=float + BRIG_TYPE_F64X2 = BRIG_TYPE_F64 | BRIG_TYPE_PACK_128, //.ctype=double + + BRIG_TYPE_U8_ARRAY = BRIG_TYPE_U8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U16_ARRAY = BRIG_TYPE_U16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U32_ARRAY = BRIG_TYPE_U32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U64_ARRAY = BRIG_TYPE_U64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S8_ARRAY = BRIG_TYPE_S8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S16_ARRAY = BRIG_TYPE_S16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S32_ARRAY = BRIG_TYPE_S32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S64_ARRAY = BRIG_TYPE_S64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_F16_ARRAY = BRIG_TYPE_F16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_F32_ARRAY = BRIG_TYPE_F32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_F64_ARRAY = BRIG_TYPE_F64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_B8_ARRAY = BRIG_TYPE_B8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_B16_ARRAY = BRIG_TYPE_B16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_B32_ARRAY = BRIG_TYPE_B32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_B64_ARRAY = BRIG_TYPE_B64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_B128_ARRAY = BRIG_TYPE_B128 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_SAMP_ARRAY = BRIG_TYPE_SAMP | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_ROIMG_ARRAY = BRIG_TYPE_ROIMG | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_WOIMG_ARRAY = BRIG_TYPE_WOIMG | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_RWIMG_ARRAY = BRIG_TYPE_RWIMG | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_SIG32_ARRAY = BRIG_TYPE_SIG32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_SIG64_ARRAY = BRIG_TYPE_SIG64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U8X4_ARRAY = BRIG_TYPE_U8X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U8X8_ARRAY = BRIG_TYPE_U8X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U8X16_ARRAY = BRIG_TYPE_U8X16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U16X2_ARRAY = BRIG_TYPE_U16X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U16X4_ARRAY = BRIG_TYPE_U16X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U16X8_ARRAY = BRIG_TYPE_U16X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U32X2_ARRAY = BRIG_TYPE_U32X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U32X4_ARRAY = BRIG_TYPE_U32X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U64X2_ARRAY = BRIG_TYPE_U64X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S8X4_ARRAY = BRIG_TYPE_S8X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S8X8_ARRAY = BRIG_TYPE_S8X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S8X16_ARRAY = BRIG_TYPE_S8X16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S16X2_ARRAY = BRIG_TYPE_S16X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S16X4_ARRAY = BRIG_TYPE_S16X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S16X8_ARRAY = BRIG_TYPE_S16X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S32X2_ARRAY = BRIG_TYPE_S32X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S32X4_ARRAY = BRIG_TYPE_S32X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S64X2_ARRAY = BRIG_TYPE_S64X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_F16X2_ARRAY = BRIG_TYPE_F16X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_F16X4_ARRAY = BRIG_TYPE_F16X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_F16X8_ARRAY = BRIG_TYPE_F16X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_F32X2_ARRAY = BRIG_TYPE_F32X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_F32X4_ARRAY = BRIG_TYPE_F32X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_F64X2_ARRAY = BRIG_TYPE_F64X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + + // Used internally + BRIG_TYPE_INVALID = (unsigned) -1 //.skip +}; + +enum BrigVariableModifierMask { + + //.nodump + + BRIG_VARIABLE_DEFINITION = 1, + BRIG_VARIABLE_CONST = 2 +}; + +enum BrigWidth { + + //.tddef=1 + // + //.print={ s/^BRIG_WIDTH_//; "_width($_)" } + + BRIG_WIDTH_NONE = 0, + BRIG_WIDTH_1 = 1, + BRIG_WIDTH_2 = 2, + BRIG_WIDTH_4 = 3, + BRIG_WIDTH_8 = 4, + BRIG_WIDTH_16 = 5, + BRIG_WIDTH_32 = 6, + BRIG_WIDTH_64 = 7, + BRIG_WIDTH_128 = 8, + BRIG_WIDTH_256 = 9, + BRIG_WIDTH_512 = 10, + BRIG_WIDTH_1024 = 11, + BRIG_WIDTH_2048 = 12, + BRIG_WIDTH_4096 = 13, + BRIG_WIDTH_8192 = 14, + BRIG_WIDTH_16384 = 15, + BRIG_WIDTH_32768 = 16, + BRIG_WIDTH_65536 = 17, + BRIG_WIDTH_131072 = 18, + BRIG_WIDTH_262144 = 19, + BRIG_WIDTH_524288 = 20, + BRIG_WIDTH_1048576 = 21, + BRIG_WIDTH_2097152 = 22, + BRIG_WIDTH_4194304 = 23, + BRIG_WIDTH_8388608 = 24, + BRIG_WIDTH_16777216 = 25, + BRIG_WIDTH_33554432 = 26, + BRIG_WIDTH_67108864 = 27, + BRIG_WIDTH_134217728 = 28, + BRIG_WIDTH_268435456 = 29, + BRIG_WIDTH_536870912 = 30, + BRIG_WIDTH_1073741824 = 31, + BRIG_WIDTH_2147483648 = 32, + BRIG_WIDTH_WAVESIZE = 33, + BRIG_WIDTH_ALL = 34, + + BRIG_WIDTH_LAST //.skip +}; + +enum BrigExceptionsMask { + BRIG_EXCEPTIONS_INVALID_OPERATION = 1 << 0, + BRIG_EXCEPTIONS_DIVIDE_BY_ZERO = 1 << 1, + BRIG_EXCEPTIONS_OVERFLOW = 1 << 2, + BRIG_EXCEPTIONS_UNDERFLOW = 1 << 3, + BRIG_EXCEPTIONS_INEXACT = 1 << 4, + + BRIG_EXCEPTIONS_FIRST_USER_DEFINED = 1 << 16 +}; +#endif + +#endif Index: lib/Target/HSAIL/HSAILComparisons.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILComparisons.td @@ -0,0 +1,9 @@ + +defm CMP : InstCmp_RetTypes<"cmp", BrigOpcode.CMP>; + +//////////////////////////////////////////////////////////////////////////////// +// floating-point classify instructions + +defm CLASS : InstSourceType_2Op_Class_Types<"class", BrigOpcode.CLASS>; +def : InstSourceType_Class_Pat; +def : InstSourceType_Class_Pat; Index: lib/Target/HSAIL/HSAILCompilerErrors.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILCompilerErrors.h @@ -0,0 +1,78 @@ +//===-- HSAILCompilerErrors.h -----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef _HSAIL_COMPILER_ERRORS_H_ +#define _HSAIL_COMPILER_ERRORS_H_ +// Compiler errors generated by the backend that will cause +// the runtime to abort compilation. These are mainly for +// device constraint violations or invalid code. +namespace hsa { + +#define INVALID_COMPUTE 0 +#define GENERIC_ERROR 1 +#define INTERNAL_ERROR 2 +#define MISSING_FUNCTION_CALL 3 +#define RESERVED_FUNCTION 4 +#define BYTE_STORE_ERROR 5 +#define UNKNOWN_TYPE_NAME 6 +#define NO_IMAGE_SUPPORT 7 +#define NO_ATOMIC_32 8 +#define NO_ATOMIC_64 9 +#define IRREDUCIBLE_CF 10 +#define INSUFFICIENT_RESOURCES 11 +#define INSUFFICIENT_LOCAL_RESOURCES 12 +#define INSUFFICIENT_PRIVATE_RESOURCES 13 +#define INSUFFICIENT_IMAGE_RESOURCES 14 +#define DOUBLE_NOT_SUPPORTED 15 +#define INVALID_CONSTANT_WRITE 16 +#define INSUFFICIENT_CONSTANT_RESOURCES 17 +#define INSUFFICIENT_COUNTER_RESOURCES 18 +#define INSUFFICIENT_REGION_RESOURCES 19 +#define REGION_MEMORY_ERROR 20 +#define MEMOP_NO_ALLOCATION 21 +#define RECURSIVE_FUNCTION 22 +#define INCORRECT_COUNTER_USAGE 23 +#define INVALID_INTRINSIC_USAGE 24 +#define INSUFFICIENT_SEMAPHORE_RESOURCES 25 +#define NO_SEMAPHORE_SUPPORT 26 +#define INVALID_INIT_VALUE 27 +#define NUM_ERROR_MESSAGES 28 + +static const char *CompilerErrorMessage[NUM_ERROR_MESSAGES] = { + "E000:Compute Shader Not Supported! ", + "E001:Generic Compiler Error Message! ", + "E002:Internal Compiler Error Message!", + "E003:Missing Function Call Detected! ", + "E004:Reserved Function Call Detected!", + "E005:Byte Addressable Stores Invalid!", + "E006:Kernel Arg Type Name Is Invalid!", + "E007:Image 1.0 Extension Unsupported!", + "E008:32bit Atomic Op are Unsupported!", + "E009:64bit Atomic Op are Unsupported!", + "E010:Irreducible ControlFlow Detected", + "E011:Insufficient Resources Detected!", + "E012:Insufficient Local Resources! ", + "E013:Insufficient Private Resources! ", + "E014:Images not currently supported! ", + "E015:Double precision not supported! ", + "E016:Invalid Constant Memory Write! ", + "E017:Max number Constant Ptr reached!", + "E018:Max number of Counters reached! ", + "E019:Insufficient Region Resources! ", + "E020:Region address space invalid! ", + "E021:MemOp with no memory allocated! ", + "E022:Recursive Function detected! ", + "E023:Illegal Inc+Dec to same counter!", + "E024:Illegal usage of intrinsic inst!", + "E025:Insufficient Semaphore Resources", + "E026:Semaphores not supported! ", + "E027:Semaphore init value is invalid!"}; +} + +#endif // _HSAIL_COMPILER_ERRORS_H_ Index: lib/Target/HSAIL/HSAILControlFlow.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILControlFlow.td @@ -0,0 +1,77 @@ +//===------------------------------------------------------*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//////////////////////////////////////////////////////////////////////////////// + +let isNotDuplicable = 1, isCall = 1 in { + // No semicolon because we need to specially handle the printing of + // the variable_ops. + def CALL : HSAILInst<(outs), + (ins calltarget:$dest, variable_ops), + "call\t$dest ", [], + 0, + 0 + >; +} + +//////////////////////////////////////////////////////////////////////////////// + +let isNotDuplicable = 1, hasSideEffects = 1 in { + def ARG_SCOPE_START : HSAILInst< + (outs), + (ins i32imm:$src0), + "\\{", + [(IL_callseq_start timm:$src0)], + 0, + 0 + >; + + def ARG_SCOPE_END : HSAILInst< + (outs), + (ins i32imm:$src0, i32imm:$src1), + "\\}", + [(IL_callseq_end timm:$src0, timm:$src1)], + 0, + 0 + >; +} + +//////////////////////////////////////////////////////////////////////////////// +// ret + +let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, + isNotDuplicable = 1, hasSideEffects = 1 in { + def RET : HSAILInstBasic_0Op_NoRet<"ret", BrigOpcode.RET, Inst_Void>; +} + +def : InstBasic_0Op_NoRet_Pat; + +//////////////////////////////////////////////////////////////////////////////// +// branch (unconditional and conditional) + + +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { + let isBarrier = 1, WidthAttr = WidthAttrValues.ALL in { + def BR : HSAILInstBr_1Op_NoRet<"br", BrigOpcode.BR>; + } + + def CBR : HSAILInstBr_2Op_NoRet<"cbr", BrigOpcode.CBR>; + + // TODO: can sbr be used to support brind? +} + +def : Pat< + (br bb:$src0), + (BR BrigWidth._ALL, bb:$src0, BrigType.NONE) +>; + +def : Pat< + (brcond i1:$src0, bb:$src1), + (CBR BrigWidth._1, $src0, bb:$src1, BrigType.B1) +>; Index: lib/Target/HSAIL/HSAILConversions.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILConversions.td @@ -0,0 +1,335 @@ +//===------------------------------------------------------*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//////////////////////////////////////////////////////////////////////////////// +// conversion routines + + +class CvtPat : Pat< + (destTy.VT (node srcTy.VT:$src)), + (!cast("CVT"#destTy.InstName#srcTy.InstName) ftz, roundmode, destTy.BT, srcTy.BT, $src) +>; + +class CvtSInt32RoundPat : Pat< + (i32 (fp_to_sint (roundop srcTy.VT:$src))), + (!cast("CVT_S32"#srcTy.InstName) ftz, roundmode, BrigType.S32, srcTy.BT, $src) +>; + +let isConv = 1 in { + defm RINT : InstMod_1Op_FPTypes<"rint", BrigOpcode.RINT>; + defm FLOOR : InstMod_1Op_FPTypes<"floor", BrigOpcode.FLOOR>; + defm CEIL : InstMod_1Op_FPTypes<"ceil", BrigOpcode.CEIL>; + defm TRUNC : InstMod_1Op_FPTypes<"trunc", BrigOpcode.TRUNC>; +} + + +def : CvtPat; +def : CvtPat; +def : CvtPat; +def : CvtPat; +//def : CvtPat; // FIXME +def : CvtPat; + +def : CvtPat; +def : CvtPat; +def : CvtPat; +def : CvtPat; +//def : CvtPat; // FIXME +def : CvtPat; + +def : CvtPat; +def : CvtPat; +def : CvtPat; +def : CvtPat; + +def : CvtPat; +def : CvtPat; +def : CvtPat; +def : CvtPat; + +def : CvtPat; +def : CvtPat; +def : CvtPat; + +def : CvtPat; +def : CvtPat; +def : CvtPat; + +def : CvtPat; +def : CvtPat; +def : CvtPat; + +def : CvtPat; +def : CvtPat; +def : CvtPat; + +// ftz conversion complies our OpenCL contract wrt f64 denorms, +// because f32->f64 would not yield f64 denorms, so whether f64 is +// flushed or not does not really matter. +def : CvtPat; +def : CvtPat; + +def : CvtSInt32RoundPat; +def : CvtSInt32RoundPat; +def : CvtSInt32RoundPat; +def : CvtSInt32RoundPat; + +def : CvtSInt32RoundPat; +def : CvtSInt32RoundPat; +def : CvtSInt32RoundPat; +def : CvtSInt32RoundPat; + + +//////////////////////////////////////////////////////////////////////////////// +// support for explicit conversions + +// float to int +def : CvtPat; +def : CvtPat; +def : CvtPat; +def : CvtPat; + +// float to uint +def : CvtPat; +def : CvtPat; +def : CvtPat; +def : CvtPat; + + +// float to long +def : CvtPat; +def : CvtPat; +def : CvtPat; +def : CvtPat; + +// float to ulong +def : CvtPat; +def : CvtPat; +def : CvtPat; +def : CvtPat; + + + +// double to int +def : CvtPat; +def : CvtPat; +def : CvtPat; +def : CvtPat; + +// double to uint +def : CvtPat; +def : CvtPat; +def : CvtPat; +def : CvtPat; + + +// double to long +def : CvtPat; +def : CvtPat; +def : CvtPat; +def : CvtPat; + +// double to ulong +def : CvtPat; +def : CvtPat; +def : CvtPat; +def : CvtPat; + +// half to float +def : CvtPat; + +// float to half +def : CvtPat; +def : CvtPat; +def : CvtPat; +def : CvtPat; + +// double to half +def : CvtPat; +def : CvtPat; +def : CvtPat; +def : CvtPat; + +// int to float +def : CvtPat; +def : CvtPat; +def : CvtPat; + +// uint to float +def : CvtPat; +def : CvtPat; +def : CvtPat; + +// long to float +def : CvtPat; +def : CvtPat; +def : CvtPat; + +// ulong to float +def : CvtPat; +def : CvtPat; +def : CvtPat; + +// long to double +def : CvtPat; +def : CvtPat; +def : CvtPat; + +// ulong to double +def : CvtPat; +def : CvtPat; +def : CvtPat; + +// double to float +def : CvtPat; +def : CvtPat; +def : CvtPat; + +def : InstMod_1Op_Pat; +def : InstMod_1Op_Pat; + +def : InstMod_1Op_Pat; +def : InstMod_1Op_Pat; + +def : InstMod_1Op_Pat; +def : InstMod_1Op_Pat; + +def : InstMod_1Op_Pat; +def : InstMod_1Op_Pat; Index: lib/Target/HSAIL/HSAILELFTargetObjectFile.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILELFTargetObjectFile.h @@ -0,0 +1,59 @@ +//===-- HSAILELFObjectFile.h - HSAIL ELF Object Info ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef _HSAIL_ELF_OBJECT_FILE_H_ +#define _HSAIL_ELF_OBJECT_FILE_H_ + +#include "HSAILSection.h" + +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" + +namespace llvm { + +class HSAILTargetObjectFile : public TargetLoweringObjectFileELF { +public: + HSAILTargetObjectFile(); + void Initialize(MCContext &ctx, const TargetMachine &TM) override; + + const MCSection *getSectionForConstant(SectionKind Kind, + const Constant *C) const override { + return ReadOnlySection; + } + + const MCSection * + getExplicitSectionGlobal(const GlobalValue *GV, + SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const override { + return DataSection; + } + + const MCSection * + SelectSectionForGlobal(const GlobalValue *GV, + SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const override { + return getDataSection(); + } +}; + +// FIXME: Do we really need both of these? +class BRIG32_DwarfTargetObjectFile : public TargetLoweringObjectFileELF { +public: + BRIG32_DwarfTargetObjectFile(){}; + virtual ~BRIG32_DwarfTargetObjectFile(); +}; + +class BRIG64_DwarfTargetObjectFile : public TargetLoweringObjectFileELF { +public: + BRIG64_DwarfTargetObjectFile(){}; + virtual ~BRIG64_DwarfTargetObjectFile(); +}; + +} // end namespace llvm + +#endif // _HSAIL_ELF_OBJECT_FILE_H_ Index: lib/Target/HSAIL/HSAILELFTargetObjectFile.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILELFTargetObjectFile.cpp @@ -0,0 +1,84 @@ +//===-- HSAILELFTargetObjectFile.cpp ----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HSAILELFTargetObjectFile.h" + +using namespace llvm; + + +HSAILTargetObjectFile::HSAILTargetObjectFile() { + TextSection = nullptr; + DataSection = nullptr; + BSSSection = nullptr; + ReadOnlySection = nullptr; + + StaticCtorSection = nullptr; + StaticDtorSection = nullptr; + LSDASection = nullptr; + EHFrameSection = nullptr; + DwarfAbbrevSection = nullptr; + DwarfInfoSection = nullptr; + DwarfLineSection = nullptr; + DwarfFrameSection = nullptr; + DwarfPubTypesSection = nullptr; + DwarfDebugInlineSection = nullptr; + DwarfStrSection = nullptr; + DwarfLocSection = nullptr; + DwarfARangesSection = nullptr; + DwarfRangesSection = nullptr; +} + +void HSAILTargetObjectFile::Initialize(MCContext &ctx, + const TargetMachine &TM) { + TargetLoweringObjectFile::Initialize(ctx, TM); + + TextSection = new HSAILSection(MCSection::SV_ELF, SectionKind::getText()); + DataSection = + new HSAILSection(MCSection::SV_ELF, SectionKind::getDataRel()); + BSSSection = new HSAILSection(MCSection::SV_ELF, SectionKind::getBSS()); + ReadOnlySection = + new HSAILSection(MCSection::SV_ELF, SectionKind::getReadOnly()); + + StaticCtorSection = + new HSAILSection(MCSection::SV_ELF, SectionKind::getMetadata()); + StaticDtorSection = + new HSAILSection(MCSection::SV_ELF, SectionKind::getMetadata()); + LSDASection = + new HSAILSection(MCSection::SV_ELF, SectionKind::getMetadata()); + EHFrameSection = + new HSAILSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfAbbrevSection = + new HSAILSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfInfoSection = + new HSAILSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfLineSection = + new HSAILSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfFrameSection = + new HSAILSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfPubTypesSection = + new HSAILSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfDebugInlineSection = + new HSAILSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfStrSection = + new HSAILSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfLocSection = + new HSAILSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfARangesSection = + new HSAILSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfRangesSection = + new HSAILSection(MCSection::SV_ELF, SectionKind::getMetadata()); +} + +BRIG32_DwarfTargetObjectFile::~BRIG32_DwarfTargetObjectFile() { + +} + +BRIG64_DwarfTargetObjectFile::~BRIG64_DwarfTargetObjectFile() { + +} Index: lib/Target/HSAIL/HSAILEnums.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILEnums.td @@ -0,0 +1,662 @@ +//===------------------------------------------------------*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +let OperandType = "OPERAND_IMMEDIATE" in { + +def BrigAlignment : Operand { + let PrintMethod = "printBrigAlignment"; + int _NONE = 0; + int _1 = 1; + int _2 = 2; + int _4 = 3; + int _8 = 4; + int _16 = 5; + int _32 = 6; + int _64 = 7; + int _128 = 8; + int _256 = 9; + int _LAST = 10; + int _MAX = 9; +} + +def BrigAllocation : Operand { + let PrintMethod = "printBrigAllocation"; + int NONE = 0; + int PROGRAM = 1; + int AGENT = 2; + int AUTOMATIC = 3; +} + +def BrigAluModifierMask : Operand { + let PrintMethod = "printBrigAluModifierMask"; + int FTZ = 1; +} + +def BrigAtomicOperation : Operand { + let PrintMethod = "printBrigAtomicOperation"; + int ADD = 0; + int AND = 1; + int CAS = 2; + int EXCH = 3; + int LD = 4; + int MAX = 5; + int MIN = 6; + int OR = 7; + int ST = 8; + int SUB = 9; + int WRAPDEC = 10; + int WRAPINC = 11; + int XOR = 12; + int WAIT_EQ = 13; + int WAIT_NE = 14; + int WAIT_LT = 15; + int WAIT_GTE = 16; + int WAITTIMEOUT_EQ = 17; + int WAITTIMEOUT_NE = 18; + int WAITTIMEOUT_LT = 19; + int WAITTIMEOUT_GTE = 20; +} + +def BrigCompareOperation : Operand { + let PrintMethod = "printBrigCompareOperation"; + int EQ = 0; + int NE = 1; + int LT = 2; + int LE = 3; + int GT = 4; + int GE = 5; + int EQU = 6; + int NEU = 7; + int LTU = 8; + int LEU = 9; + int GTU = 10; + int GEU = 11; + int NUM = 12; + int NAN = 13; + int SEQ = 14; + int SNE = 15; + int SLT = 16; + int SLE = 17; + int SGT = 18; + int SGE = 19; + int SGEU = 20; + int SEQU = 21; + int SNEU = 22; + int SLTU = 23; + int SLEU = 24; + int SNUM = 25; + int SNAN = 26; + int SGTU = 27; +} + +def BrigControlDirective : Operand { + let PrintMethod = "printBrigControlDirective"; + int NONE = 0; + int ENABLEBREAKEXCEPTIONS = 1; + int ENABLEDETECTEXCEPTIONS = 2; + int MAXDYNAMICGROUPSIZE = 3; + int MAXFLATGRIDSIZE = 4; + int MAXFLATWORKGROUPSIZE = 5; + int REQUIREDDIM = 6; + int REQUIREDGRIDSIZE = 7; + int REQUIREDWORKGROUPSIZE = 8; + int REQUIRENOPARTIALWORKGROUPS = 9; +} + +def BrigExecutableModifierMask : Operand { + let PrintMethod = "printBrigExecutableModifierMask"; + int DEFINITION = 1; +} + +def BrigImageChannelOrder : Operand { + let PrintMethod = "printBrigImageChannelOrder"; + int A = 0; + int R = 1; + int RX = 2; + int RG = 3; + int RGX = 4; + int RA = 5; + int RGB = 6; + int RGBX = 7; + int RGBA = 8; + int BGRA = 9; + int ARGB = 10; + int ABGR = 11; + int SRGB = 12; + int SRGBX = 13; + int SRGBA = 14; + int SBGRA = 15; + int INTENSITY = 16; + int LUMINANCE = 17; + int DEPTH = 18; + int DEPTH_STENCIL = 19; + int UNKNOWN = 20; + int FIRST_USER_DEFINED = 128; +} + +def BrigImageChannelType : Operand { + let PrintMethod = "printBrigImageChannelType"; + int SNORM_INT8 = 0; + int SNORM_INT16 = 1; + int UNORM_INT8 = 2; + int UNORM_INT16 = 3; + int UNORM_INT24 = 4; + int UNORM_SHORT_555 = 5; + int UNORM_SHORT_565 = 6; + int UNORM_INT_101010 = 7; + int SIGNED_INT8 = 8; + int SIGNED_INT16 = 9; + int SIGNED_INT32 = 10; + int UNSIGNED_INT8 = 11; + int UNSIGNED_INT16 = 12; + int UNSIGNED_INT32 = 13; + int HALF_FLOAT = 14; + int FLOAT = 15; + int UNKNOWN = 16; + int FIRST_USER_DEFINED = 128; +} + +def BrigImageGeometry : Operand { + let PrintMethod = "printBrigImageGeometry"; + int _1D = 0; + int _2D = 1; + int _3D = 2; + int _1DA = 3; + int _2DA = 4; + int _1DB = 5; + int _2DDEPTH = 6; + int _2DADEPTH = 7; + int _UNKNOWN = 8; + int _FIRST_USER_DEFINED = 128; +} + +def BrigImageQuery : Operand { + let PrintMethod = "printBrigImageQuery"; + int WIDTH = 0; + int HEIGHT = 1; + int DEPTH = 2; + int ARRAY = 3; + int CHANNELORDER = 4; + int CHANNELTYPE = 5; +} + +def BrigLinkage : Operand { + let PrintMethod = "printBrigLinkage"; + int NONE = 0; + int PROGRAM = 1; + int MODULE = 2; + int FUNCTION = 3; + int ARG = 4; +} + +def BrigMachineModel : Operand { + let PrintMethod = "printBrigMachineModel"; + int SMALL = 0; + int LARGE = 1; + int UNDEF = 2; +} + +def BrigMemoryModifierMask : Operand { + let PrintMethod = "printBrigMemoryModifierMask"; + int CONST = 1; +} + +def BrigMemoryOrder : Operand { + let PrintMethod = "printBrigMemoryOrder"; + int NONE = 0; + int RELAXED = 1; + int SC_ACQUIRE = 2; + int SC_RELEASE = 3; + int SC_ACQUIRE_RELEASE = 4; + int LAST = 5; +} + +def BrigMemoryScope : Operand { + let PrintMethod = "printBrigMemoryScope"; + int NONE = 0; + int WORKITEM = 1; + int WAVEFRONT = 2; + int WORKGROUP = 3; + int AGENT = 4; + int SYSTEM = 5; + int LAST = 6; +} + +def BrigOpcode : Operand { + let PrintMethod = "printBrigOpcode"; + int NOP = 0; + int ABS = 1; + int ADD = 2; + int BORROW = 3; + int CARRY = 4; + int CEIL = 5; + int COPYSIGN = 6; + int DIV = 7; + int FLOOR = 8; + int FMA = 9; + int FRACT = 10; + int MAD = 11; + int MAX = 12; + int MIN = 13; + int MUL = 14; + int MULHI = 15; + int NEG = 16; + int REM = 17; + int RINT = 18; + int SQRT = 19; + int SUB = 20; + int TRUNC = 21; + int MAD24 = 22; + int MAD24HI = 23; + int MUL24 = 24; + int MUL24HI = 25; + int SHL = 26; + int SHR = 27; + int AND = 28; + int NOT = 29; + int OR = 30; + int POPCOUNT = 31; + int XOR = 32; + int BITEXTRACT = 33; + int BITINSERT = 34; + int BITMASK = 35; + int BITREV = 36; + int BITSELECT = 37; + int FIRSTBIT = 38; + int LASTBIT = 39; + int COMBINE = 40; + int EXPAND = 41; + int LDA = 42; + int MOV = 43; + int SHUFFLE = 44; + int UNPACKHI = 45; + int UNPACKLO = 46; + int PACK = 47; + int UNPACK = 48; + int CMOV = 49; + int CLASS = 50; + int NCOS = 51; + int NEXP2 = 52; + int NFMA = 53; + int NLOG2 = 54; + int NRCP = 55; + int NRSQRT = 56; + int NSIN = 57; + int NSQRT = 58; + int BITALIGN = 59; + int BYTEALIGN = 60; + int PACKCVT = 61; + int UNPACKCVT = 62; + int LERP = 63; + int SAD = 64; + int SADHI = 65; + int SEGMENTP = 66; + int FTOS = 67; + int STOF = 68; + int CMP = 69; + int CVT = 70; + int LD = 71; + int ST = 72; + int ATOMIC = 73; + int ATOMICNORET = 74; + int SIGNAL = 75; + int SIGNALNORET = 76; + int MEMFENCE = 77; + int RDIMAGE = 78; + int LDIMAGE = 79; + int STIMAGE = 80; + int IMAGEFENCE = 81; + int QUERYIMAGE = 82; + int QUERYSAMPLER = 83; + int CBR = 84; + int BR = 85; + int SBR = 86; + int BARRIER = 87; + int WAVEBARRIER = 88; + int ARRIVEFBAR = 89; + int INITFBAR = 90; + int JOINFBAR = 91; + int LEAVEFBAR = 92; + int RELEASEFBAR = 93; + int WAITFBAR = 94; + int LDF = 95; + int ACTIVELANECOUNT = 96; + int ACTIVELANEID = 97; + int ACTIVELANEMASK = 98; + int ACTIVELANEPERMUTE = 99; + int CALL = 100; + int SCALL = 101; + int ICALL = 102; + int RET = 103; + int ALLOCA = 104; + int CURRENTWORKGROUPSIZE = 105; + int CURRENTWORKITEMFLATID = 106; + int DIM = 107; + int GRIDGROUPS = 108; + int GRIDSIZE = 109; + int PACKETCOMPLETIONSIG = 110; + int PACKETID = 111; + int WORKGROUPID = 112; + int WORKGROUPSIZE = 113; + int WORKITEMABSID = 114; + int WORKITEMFLATABSID = 115; + int WORKITEMFLATID = 116; + int WORKITEMID = 117; + int CLEARDETECTEXCEPT = 118; + int GETDETECTEXCEPT = 119; + int SETDETECTEXCEPT = 120; + int ADDQUEUEWRITEINDEX = 121; + int CASQUEUEWRITEINDEX = 122; + int LDQUEUEREADINDEX = 123; + int LDQUEUEWRITEINDEX = 124; + int STQUEUEREADINDEX = 125; + int STQUEUEWRITEINDEX = 126; + int CLOCK = 127; + int CUID = 128; + int DEBUGTRAP = 129; + int GROUPBASEPTR = 130; + int KERNARGBASEPTR = 131; + int LANEID = 132; + int MAXCUID = 133; + int MAXWAVEID = 134; + int NULLPTR = 135; + int WAVEID = 136; + int FIRST_USER_DEFINED = 32768; + int GCNMADU = 32768; + int GCNMADS = 32769; + int GCNMAX3 = 32770; + int GCNMIN3 = 32771; + int GCNMED3 = 32772; + int GCNFLDEXP = 32773; + int GCNFREXP_EXP = 32774; + int GCNFREXP_MANT = 32775; + int GCNTRIG_PREOP = 32776; + int GCNBFM = 32777; + int GCNLD = 32778; + int GCNST = 32779; + int GCNATOMIC = 32780; + int GCNATOMICNORET = 32781; + int GCNSLEEP = 32782; + int GCNPRIORITY = 32783; + int GCNREGIONALLOC = 32784; + int GCNMSAD = 32785; + int GCNQSAD = 32786; + int GCNMQSAD = 32787; + int GCNMQSAD4 = 32788; + int GCNSADW = 32789; + int GCNSADD = 32790; + int GCNCONSUME = 32791; + int GCNAPPEND = 32792; + int GCNB4XCHG = 32793; + int GCNB32XCHG = 32794; + int GCNMAX = 32795; + int GCNMIN = 32796; + int GCNDIVRELAXED = 32797; + int GCNDIVRELAXEDNARROW = 32798; +} + +def BrigPack : Operand { + let PrintMethod = "printBrigPack"; + int NONE = 0; + int PP = 1; + int PS = 2; + int SP = 3; + int SS = 4; + int S = 5; + int P = 6; + int PPSAT = 7; + int PSSAT = 8; + int SPSAT = 9; + int SSSAT = 10; + int SSAT = 11; + int PSAT = 12; +} + +def BrigProfile : Operand { + let PrintMethod = "printBrigProfile"; + int BASE = 0; + int FULL = 1; + int UNDEF = 2; +} + +def BrigRound : Operand { + let PrintMethod = "printBrigRound"; + int NONE = 0; + int FLOAT_DEFAULT = 1; + int FLOAT_NEAR_EVEN = 2; + int FLOAT_ZERO = 3; + int FLOAT_PLUS_INFINITY = 4; + int FLOAT_MINUS_INFINITY = 5; + int INTEGER_NEAR_EVEN = 6; + int INTEGER_ZERO = 7; + int INTEGER_PLUS_INFINITY = 8; + int INTEGER_MINUS_INFINITY = 9; + int INTEGER_NEAR_EVEN_SAT = 10; + int INTEGER_ZERO_SAT = 11; + int INTEGER_PLUS_INFINITY_SAT = 12; + int INTEGER_MINUS_INFINITY_SAT = 13; + int INTEGER_SIGNALING_NEAR_EVEN = 14; + int INTEGER_SIGNALING_ZERO = 15; + int INTEGER_SIGNALING_PLUS_INFINITY = 16; + int INTEGER_SIGNALING_MINUS_INFINITY = 17; + int INTEGER_SIGNALING_NEAR_EVEN_SAT = 18; + int INTEGER_SIGNALING_ZERO_SAT = 19; + int INTEGER_SIGNALING_PLUS_INFINITY_SAT = 20; + int INTEGER_SIGNALING_MINUS_INFINITY_SAT = 21; +} + +def BrigSamplerAddressing : Operand { + let PrintMethod = "printBrigSamplerAddressing"; + int UNDEFINED = 0; + int CLAMP_TO_EDGE = 1; + int CLAMP_TO_BORDER = 2; + int REPEAT = 3; + int MIRRORED_REPEAT = 4; + int FIRST_USER_DEFINED = 128; +} + +def BrigSamplerCoordNormalization : Operand { + let PrintMethod = "printBrigSamplerCoordNormalization"; + int UNNORMALIZED = 0; + int NORMALIZED = 1; +} + +def BrigSamplerFilter : Operand { + let PrintMethod = "printBrigSamplerFilter"; + int NEAREST = 0; + int LINEAR = 1; + int FIRST_USER_DEFINED = 128; +} + +def BrigSamplerQuery : Operand { + let PrintMethod = "printBrigSamplerQuery"; + int ADDRESSING = 0; + int COORD = 1; + int FILTER = 2; +} + +def BrigSegCvtModifierMask : Operand { + let PrintMethod = "printBrigSegCvtModifierMask"; + int NONULL = 1; +} + +def BrigSegment : Operand { + let PrintMethod = "printBrigSegment"; + int NONE = 0; + int FLAT = 1; + int GLOBAL = 2; + int READONLY = 3; + int KERNARG = 4; + int GROUP = 5; + int PRIVATE = 6; + int SPILL = 7; + int ARG = 8; + int FIRST_USER_DEFINED = 128; + int AMD_GCN = 9; +} + +def AddressSpace : Operand { + let PrintMethod = "printBrigSegment"; + int PRIVATE = 0; + int GLOBAL = 1; + int READONLY = 2; + int GROUP = 3; + int FLAT = 4; + int REGION = 5; + int SPILL = 6; + int KERNARG = 7; + int ARG = 8; +} + +def BrigType : Operand { + let PrintMethod = "printBrigType"; + int NONE = 0; + int U8 = 1; + int U16 = 2; + int U32 = 3; + int U64 = 4; + int S8 = 5; + int S16 = 6; + int S32 = 7; + int S64 = 8; + int F16 = 9; + int F32 = 10; + int F64 = 11; + int B1 = 12; + int B8 = 13; + int B16 = 14; + int B32 = 15; + int B64 = 16; + int B128 = 17; + int SAMP = 18; + int ROIMG = 19; + int WOIMG = 20; + int RWIMG = 21; + int SIG32 = 22; + int SIG64 = 23; + int U8X4 = 33; + int U8X8 = 65; + int U8X16 = 97; + int U16X2 = 34; + int U16X4 = 66; + int U16X8 = 98; + int U32X2 = 67; + int U32X4 = 99; + int U64X2 = 100; + int S8X4 = 37; + int S8X8 = 69; + int S8X16 = 101; + int S16X2 = 38; + int S16X4 = 70; + int S16X8 = 102; + int S32X2 = 71; + int S32X4 = 103; + int S64X2 = 104; + int F16X2 = 41; + int F16X4 = 73; + int F16X8 = 105; + int F32X2 = 74; + int F32X4 = 106; + int F64X2 = 107; + int U8_ARRAY = 129; + int U16_ARRAY = 130; + int U32_ARRAY = 131; + int U64_ARRAY = 132; + int S8_ARRAY = 133; + int S16_ARRAY = 134; + int S32_ARRAY = 135; + int S64_ARRAY = 136; + int F16_ARRAY = 137; + int F32_ARRAY = 138; + int F64_ARRAY = 139; + int B8_ARRAY = 141; + int B16_ARRAY = 142; + int B32_ARRAY = 143; + int B64_ARRAY = 144; + int B128_ARRAY = 145; + int SAMP_ARRAY = 146; + int ROIMG_ARRAY = 147; + int WOIMG_ARRAY = 148; + int RWIMG_ARRAY = 149; + int SIG32_ARRAY = 150; + int SIG64_ARRAY = 151; + int U8X4_ARRAY = 161; + int U8X8_ARRAY = 193; + int U8X16_ARRAY = 225; + int U16X2_ARRAY = 162; + int U16X4_ARRAY = 194; + int U16X8_ARRAY = 226; + int U32X2_ARRAY = 195; + int U32X4_ARRAY = 227; + int U64X2_ARRAY = 228; + int S8X4_ARRAY = 165; + int S8X8_ARRAY = 197; + int S8X16_ARRAY = 229; + int S16X2_ARRAY = 166; + int S16X4_ARRAY = 198; + int S16X8_ARRAY = 230; + int S32X2_ARRAY = 199; + int S32X4_ARRAY = 231; + int S64X2_ARRAY = 232; + int F16X2_ARRAY = 169; + int F16X4_ARRAY = 201; + int F16X8_ARRAY = 233; + int F32X2_ARRAY = 202; + int F32X4_ARRAY = 234; + int F64X2_ARRAY = 235; + int INVALID = -1; +} + +def BrigVariableModifierMask : Operand { + let PrintMethod = "printBrigVariableModifierMask"; + int DEFINITION = 1; + int CONST = 2; +} + +def BrigWidth : Operand { + let PrintMethod = "printBrigWidth"; + int _NONE = 0; + int _1 = 1; + int _2 = 2; + int _4 = 3; + int _8 = 4; + int _16 = 5; + int _32 = 6; + int _64 = 7; + int _128 = 8; + int _256 = 9; + int _512 = 10; + int _1024 = 11; + int _2048 = 12; + int _4096 = 13; + int _8192 = 14; + int _16384 = 15; + int _32768 = 16; + int _65536 = 17; + int _131072 = 18; + int _262144 = 19; + int _524288 = 20; + int _1048576 = 21; + int _2097152 = 22; + int _4194304 = 23; + int _8388608 = 24; + int _16777216 = 25; + int _33554432 = 26; + int _67108864 = 27; + int _134217728 = 28; + int _268435456 = 29; + int _536870912 = 30; + int _1073741824 = 31; + int _2147483648 = 32; + int _WAVESIZE = 33; + int _ALL = 34; + int _LAST = 35; +} + +} Index: lib/Target/HSAIL/HSAILFrameLowering.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILFrameLowering.h @@ -0,0 +1,45 @@ +//=-- HSAILTargetFrameLowering.h - Define HSAIL frame lowering ---*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class implements HSAIL-specific bits of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#ifndef _HSAIL_FRAME_LOWERING_H_ +#define _HSAIL_FRAME_LOWERING_H_ + +#include "llvm/Target/TargetFrameLowering.h" + + +namespace llvm { + +class HSAILFrameLowering : public TargetFrameLowering { +public: + explicit HSAILFrameLowering(StackDirection D, unsigned StackAl, int LAO, + unsigned TransAl = 1) + : TargetFrameLowering(D, StackAl, LAO, TransAl) {} + + void emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const override {}; + + void emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const override{} + + bool hasFP(const MachineFunction &MF) const override { return false; } + + int getFrameIndexOffset(const MachineFunction &MF, int FI) const override; + + void processFunctionBeforeFrameFinalized( + MachineFunction &F, + RegScavenger *RS = nullptr) const override; +}; + +} // End llvm namespace + +#endif // _HSAIL_FRAME_LOWERING_H_ Index: lib/Target/HSAIL/HSAILFrameLowering.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILFrameLowering.cpp @@ -0,0 +1,105 @@ +//===-- HSAILFrameLowering.cpp --------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HSAILFrameLowering.h" +#include "HSAIL.h" +#include "HSAILInstrInfo.h" +#include "HSAILMachineFunctionInfo.h" + +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" + +using namespace llvm; + + +int HSAILFrameLowering::getFrameIndexOffset(const MachineFunction &MF, + int FI) const { + return MF.getFrameInfo()->getObjectOffset(FI); +} + +void HSAILFrameLowering::processFunctionBeforeFrameFinalized( + MachineFunction &MF, + RegScavenger *RS) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + + DenseMap SpillMap; + DenseMap PrivateMap; + + int64_t SpillSize = 0; + int64_t PrivateSize = 0; + unsigned SpillAlign = 4; + unsigned PrivateAlign = 4; + + for (int I = MFI->getObjectIndexBegin(), E = MFI->getObjectIndexEnd(); + I != E; ++I) { + if (MFI->isDeadObjectIndex(I)) + continue; + + unsigned Size = MFI->getObjectSize(I); + unsigned Align = MFI->getObjectAlignment(I); + unsigned Offset = MFI->getObjectOffset(I); + + assert(Offset == 0 && + "Stack object offsets should be 0 before frame finalized"); + + if (MFI->isSpillSlotObjectIndex(I)) { + // Adjust to alignment boundary. + SpillSize = (SpillSize + Align - 1) / Align * Align; + SpillMap[I] = SpillSize; // Offset + + SpillSize += Size; + SpillAlign = std::max(SpillAlign, Align); + } else { + PrivateSize = (PrivateSize + Align - 1) / Align * Align; + PrivateMap[I] = PrivateSize; // Offset + + PrivateSize += Size; + PrivateAlign = std::max(PrivateAlign, Align); + } + + MFI->RemoveStackObject(I); + } + + int PrivateIndex = -1; + int SpillIndex = -1; + + if (PrivateSize != 0) + PrivateIndex = MFI->CreateStackObject(PrivateSize, PrivateAlign, false); + + if (SpillSize != 0) + SpillIndex = MFI->CreateSpillStackObject(SpillSize, SpillAlign); + + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + unsigned Opc = MI.getOpcode(); + int AddrIdx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::address); + if (AddrIdx == -1) { + // All instructions capable of having a FrameIndex should have an + // address operand. + continue; + } + + MachineOperand &Base = MI.getOperand(AddrIdx); + if (!Base.isFI()) + continue; + + int Index = Base.getIndex(); + MachineOperand &Offset = MI.getOperand(AddrIdx + 2); + int64_t OrigOffset = Offset.getImm(); + + if (MFI->isSpillSlotObjectIndex(Index)) { + Base.setIndex(SpillIndex); + Offset.setImm(SpillMap[Index] + OrigOffset); + } else { + Base.setIndex(PrivateIndex); + Offset.setImm(PrivateMap[Index] + OrigOffset); + } + } + } +} Index: lib/Target/HSAIL/HSAILFusion.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILFusion.td @@ -0,0 +1,532 @@ +//===------------------------------------------------------*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Fused instruction patterns +// +// This files contains optimizations, rather than definitions +// essential for the code generation. +// +//===----------------------------------------------------------------------===// + +//////////////////////////////////////////////////////////////////////////////// +// llc command line options predicates +def EnableFPMAD : Predicate<"CurDAG->getTarget().Options.LessPreciseFPMAD()">; +def UnsafeFPMath : Predicate<"CurDAG->getTarget().Options.UnsafeFPMath">; +def NoInfsFPMath : Predicate<"CurDAG->getTarget().Options.NoInfsFPMath">; +def NoNaNsFPMath : Predicate<"CurDAG->getTarget().Options.NoNaNsFPMath">; + +//////////////////////////////////////////////////////////////////////////////// +// fused operation multiclasses +multiclass TernaryFusedPairOp { + def _rrr : Pat< + (op1 (op2 RC:$src0, RC:$src1), RC:$src2), + (inst RC:$src0, RC:$src1, RC:$src2, bt) + >; + + def _rri : Pat< + (op1 (op2 RC:$src0, RC:$src1), (Ty ImmTy:$src2)), + (inst RC:$src0, RC:$src1, ImmTy:$src2, bt) + >; + + def _rir : Pat< + (op1 (op2 RC:$src0, (Ty ImmTy:$src1)), RC:$src2), + (inst RC:$src0, ImmTy:$src1, RC:$src2, bt) + >; + + def _rii : Pat< + (op1 (op2 RC:$src0, (Ty ImmTy:$src1)), (Ty ImmTy:$src2)), + (inst RC:$src0, ImmTy:$src1, ImmTy:$src2, bt) + >; +} + +//////////////////////////////////////////////////////////////////////////////// +// fused multiply-add +def getShiftMult32 : SDNodeXFormgetTargetConstant(1u << (N->getZExtValue()), SDLoc(N), MVT::i32); +}]>; + +def getShiftMult64 : SDNodeXFormgetTargetConstant(((uint64_t)1u) << (N->getZExtValue()), + SDLoc(N), MVT::i64); +}]>; + +def shl32imm : PatLeaf<(imm), [{ + return N->getZExtValue() < 32; +}], getShiftMult32>; + +def shl64imm : PatLeaf<(imm), [{ + return N->getZExtValue() < 64; +}], getShiftMult64>; + + +// Pre-optimized (const1 + (x << const2)) -> const1 | (x << const2). +// This requires mutual check of const1 and const2 to ensure last bit# set in +// const1 < const2 (i.e. or can be changed to add). +def orShlAsMad : PatFrag< + (ops node:$reg, node:$shift, node:$mask), + (or (shl node:$reg, node:$shift), node:$mask), [{ + ConstantSDNode *CNShift, *CNMask; + if ((CNShift = dyn_cast(N->getOperand(0)->getOperand(1))) && + (CNMask = dyn_cast(N->getOperand(1)))) { + return (CNMask->getZExtValue() >> CNShift->getZExtValue()) == 0; + } + return false; +}]>; + +// 32 bit integer multiplication is expensive operation on current HW, +// The cost of 32 bit mul is 4 times higher than a cost of add. Therefor +// shift + add fusion is commented out as not beneficial at the moment. +// defm m2ad_u32 : TernaryFusedShlAdd<"mad_u32", i32, i32imm, shl32imm, GPR32>; +// +// There are no 64 bit muls and mads in SI, but there are 64 bit shifts and +// adds. Reversing shifts from multipier is an unneeded burden for SC. +// Therefor folding of shift + add is commented out for 64 bit ops unless we +// have a HW supporing 64 bit mads. +// +// let Predicates = [EnableOpt] in { +// defm m2ad_u64 : TernaryFusedShlAdd<"mad_u64", i64, i64imm, shl64imm, GPR64>; +// } + +// We do not define 64 bit const1 | (x << const2) folding, as we have 64 bit +// or and shift, but no 64 bit mad. +// As 32 bit integer multiplication is curently expensive, optimization is +// commented out. +// let Predicates = [EnableOpt] in { +// def mor_u32 : Pat<(orShlAsMad GPR32:$src0, +// (i32 shl32imm:$src1), (i32 imm:$src2)), +// (umad_rii_u32 GPR32:$src0, shl32imm:$src1, imm:$src2)>; +// } + +let Predicates = [EnableFPMAD] in { + defm : TernaryFusedPairOp; + defm : TernaryFusedPairOp; +} + +//////////////////////////////////////////////////////////////////////////////// +// bit strings +def imm31 : PatLeaf<(imm), [{ + return N->getZExtValue() == 31; +}]>; + +def imm32 : PatLeaf<(imm), [{ + return N->getZExtValue() == 32; +}]>; + +def imm63 : PatLeaf<(imm), [{ + return N->getZExtValue() == 63; +}]>; + +// // Shifts do not need "and {31|63}, shift-bits". +// multiclass ShrOp { + +// def _rr : Pat<(op RC:$src0, (and GPR32:$src1, (i32 ShImm))), +// (!cast(asm##t) RC:$src0, GPR32:$src1)>; + +// def _ir : Pat<(op (Ty imm:$src0), (and GPR32:$src1, (i32 ShImm))), +// (!cast(asm#"_ir"#t) imm:$src0, GPR32:$src1)>; +// } + +// let Predicates = [EnableOpt] in { +// defm shr_u32 : ShrOp<"shr", "_u32", srl, GPR32, i32, imm31>; +// defm shr_s32 : ShrOp<"shr", "_s32", sra, GPR32, i32, imm31>; +// defm shl_u32 : ShrOp<"shl", "_u32", shl, GPR32, i32, imm31>; +// defm shr_u64 : ShrOp<"shr", "_u64", srl, GPR64, i64, imm63>; +// defm shr_s64 : ShrOp<"shr", "_s64", sra, GPR64, i64, imm63>; +// defm shl_u64 : ShrOp<"shl", "_u64", shl, GPR64, i64, imm63>; +// } + +def popCnt : SDNodeXFormgetTargetConstant(countPopulation(N->getZExtValue()), + SDLoc(N), MVT::i32); +}]>; + +def isMask : PatLeaf<(imm), [{ + return isMask_64(N->getZExtValue()); +}]>; + +// Extract masks like (val & 0b0001111000) >> 3 +// Most common use looks like: (x & 0xFF00) >> 8 +class BitExtractOp : Pat< + (and (srl Ty:$src0, (i32 (GPROrImm i32:$src1))), (Ty isMask:$src2)), + (bitextractInst $src0, $src1, (i32 (popCnt $src2)), bt) +>; + +// No signed extract operations are defined since HSAIL specifies extract as +// left + right shifts rather than right shift + and +let Predicates = [EnableOpt], AddedComplexity = 10 in { + def : BitExtractOp; + def : BitExtractOp; +} + +// BFI +def bfiImmIRR : PatFrag< + (ops node:$src0, node:$src1, node:$src2, node:$src4), + (or (and node:$src1, node:$src0), (and node:$src2, node:$src4)), [{ + // check if src1 == ~src4 + ConstantSDNode *CN1, *CN2; + if ((CN1 = dyn_cast(N->getOperand(0)->getOperand(1))) && + (CN2 = dyn_cast(N->getOperand(1)->getOperand(1)))) { + return (CN1->getSExtValue() == ~(CN2->getSExtValue())); + } + return false; +}]>; + +def bfiImmIIR3 : PatFrag< + (ops node:$src0, node:$src1, node:$src2), + (xor (xor node:$src2, (and node:$src2, node:$src0)), node:$src1), [{ + // Check if src1 & src0 == src1. + ConstantSDNode *CN1, *CN2; + if ((CN1 = dyn_cast(N->getOperand(0)->getOperand(1)-> + getOperand(1))) && + (CN2 = dyn_cast(N->getOperand(1)))) { + uint64_t c2 = CN2->getZExtValue(); + return (CN1->getZExtValue() & c2) == c2; + } + return false; +}]>; + +// FIXME: These patterns are pretty fragile and break by commuting +// operands of sources. Many fo them fail on canonicalized IR for the +// pattern they match. +multiclass BitSelect { + def _rrr : Pat< + (or (and Ty:$src0, Ty:$src1), + (and Ty:$src2, (not Ty:$src0))), + (bitselectInst $src0, $src1, $src2, bt) + >; + + def _irr : Pat< + (bfiImmIRR (Ty imm:$src0), Ty:$src1, Ty:$src2, (Ty imm)), (bitselectInst imm:$src0, $src1, $src2, bt) + >; + + def _rir : Pat< + (or (and Ty:$src0, (Ty imm:$src1)), (and Ty:$src2, (not Ty:$src0))), + (bitselectInst $src0, imm:$src1, $src2, bt) + >; + + def _rii : Pat< + (or (and Ty:$src0, (Ty imm:$src1)), + (and (not Ty:$src0), (Ty imm:$src2))), + (bitselectInst $src0, imm:$src1, imm:$src2, bt) + >; + + // Alternative rii pattern: (src0 & src1) | ((src0 & src2) ^ src2) + def _rii1 : Pat< + (or (and Ty:$src0, (Ty imm:$src1)), + (xor (and Ty:$src0, (Ty imm:$src2)), (Ty imm:$src2))), + (bitselectInst $src0, imm:$src1, imm:$src2, bt) + >; + + def _rri : Pat< + (or (and Ty:$src0, Ty:$src1), + (and (not Ty:$src0), (Ty imm:$src2))), + (bitselectInst $src0, $src1, imm:$src2, bt) + >; + + // Alternative rri pattern: (src0 & src1) | ((src0 & src2) ^ src2) + def _rri1 : Pat< + (or (and Ty:$src0, Ty:$src1), + (xor (and Ty:$src0, (Ty imm:$src2)), (Ty imm:$src2))), + (bitselectInst $src0, $src1, imm:$src2, bt) + >; + + // Alternative pattern: (src2 ^ (src0 & (src1 ^ src2))) + let AddedComplexity = 10 in { + def _rrr2 : Pat< + (xor Ty:$src2, (and Ty:$src0, (xor Ty:$src1, Ty:$src2))), + (bitselectInst $src0, $src1, $src2, bt) + >; + } + + let AddedComplexity = 11 in { + // XXX - This is higher priority to fold the immediate. + def _irr2 : Pat< + (xor Ty:$src2, (and (xor Ty:$src1, Ty:$src2), imm:$src0)), + (bitselectInst imm:$src0, $src1, $src2, bt) + >; + + def _iir2 : Pat< + (xor Ty:$src2, (and (xor Ty:$src2, (Ty imm:$src1)), (Ty imm:$src0))), + (bitselectInst imm:$src0, imm:$src1, $src2, bt) + >; + + def _rir2 : Pat< + (xor Ty:$src2, (and Ty:$src0, (xor Ty:$src2, (Ty imm:$src1)))), + (bitselectInst $src0, imm:$src1, $src2, bt) + >; + + def _rri2 : Pat< + (xor (and Ty:$src0, (xor Ty:$src1, (Ty imm:$src2))), (Ty imm:$src2)), + (bitselectInst $src0, $src1, imm:$src2, bt) + >; + } + + // Alternative pattern: ((src0 & src2) ^ src2) ^ (src0 & src1) + let AddedComplexity = 4 in { + def _rrr3 : Pat< + (xor (xor Ty:$src2, (and Ty:$src0, Ty:$src2)), (and Ty:$src0, Ty:$src1)), + (bitselectInst $src0, $src1, $src2, bt) + >; + } + + let AddedComplexity = 5 in { + def _irr3 : Pat< + (xor (xor Ty:$src2, (and Ty:$src2, (Ty imm:$src0))), + (and Ty:$src1, (Ty imm:$src0))), + (bitselectInst imm:$src0, $src1, $src2, bt) + >; + + def _iir3 : Pat< + (bfiImmIIR3 (Ty imm:$src0), (Ty imm:$src1), Ty:$src2), + (bitselectInst imm:$src0, imm:$src1, $src2, bt) + >; + } + + def _rri3 : Pat< + (xor (xor (and Ty:$src0, (Ty imm:$src2)), (Ty imm:$src2)), + (and Ty:$src0, Ty:$src1)), + (bitselectInst $src0, $src1, imm:$src2, bt) + >; + + def _rii3 : Pat< + (xor (xor (and Ty:$src0, (Ty imm:$src2)), (Ty imm:$src2)), + (and Ty:$src0, (Ty imm:$src1))), + (bitselectInst $src0, imm:$src1, imm:$src2, bt) + >; +} + +let Predicates = [EnableOpt] in { +defm : BitSelect; +defm : BitSelect; +} + +// pack + +let Predicates = [EnableOpt], AddedComplexity = 5 in { + def : Pat< + (shl (i64 (anyext i32:$src)), (i32 32)), + (PACK_U32X2_U32 (i64 0), $src, (i32 1), BrigType.U32X2, BrigType.U32) + >; +} + +//////////////////////////////////////////////////////////////////////////////// +// reciprocal + +def fp32imm1 : PatLeaf<(f32 fpimm), [{ + return N->isExactlyValue(+1.0); +}]>; + +def fp64imm1 : PatLeaf<(f64 fpimm), [{ + return N->isExactlyValue(+1.0); +}]>; + +def fp32imm_minus1 : PatLeaf<(f32 fpimm), [{ + return N->isExactlyValue(-1.0); +}]>; + +def fp64imm_minus1 : PatLeaf<(f64 fpimm), [{ + return N->isExactlyValue(-1.0); +}]>; + +let Predicates = [UnsafeFPMath] in { + // Pure 1.0 / x + let AddedComplexity = 5 in { + def : Pat< + (fdiv fp32imm1, f32:$src), + (NRCP_F32 $src, BrigType.F32) + >; + + def : Pat< + (fdiv fp64imm1, f64:$src), + (NRCP_F64 $src, BrigType.F64) + >; + } + + // -1.0 / x + let AddedComplexity = 4 in { + def : Pat< + (fdiv fp32imm_minus1, f32:$src), + (NEG_F32 (f32 (NRCP_F32 $src, BrigType.F32)), BrigType.F32) + >; + + def : Pat< + (fdiv fp64imm_minus1, f64:$src), + (NEG_F64 (f64 (NRCP_F64 $src, BrigType.F64)), BrigType.F64) + >; + } + + let AddedComplexity = 5 in { + def : Pat< + (fdiv fp32imm_minus1, (fneg f32:$src)), + (NRCP_F32 $src, BrigType.F32) + >; + + def : Pat< + (fdiv fp64imm_minus1, (fneg f64:$src)), + (NRCP_F64 $src, BrigType.F64) + >; + } +} + +//////////////////////////////////////////////////////////////////////////////// +// rsqrt + +let Predicates = [UnsafeFPMath] in { + // Pure 1.0 / sqrt(x) + let AddedComplexity = 15 in { + def : Pat< + (fdiv fp32imm1, (int_HSAIL_nsqrt_f32 f32:$src)), + (NRSQRT_F32 $src, BrigType.F32) + >; + + def : Pat< + (fdiv fp64imm1, (int_HSAIL_nsqrt_f64 f64:$src)), + (NRSQRT_F64 $src, BrigType.F64) + >; + } + + let AddedComplexity = 10 in { + def : Pat< + (fdiv f32:$src0, (int_HSAIL_nsqrt_f32 f32:$src1)), + (f32 (MUL_F32 1, 0, $src0, (f32 (NRSQRT_F32 $src1, BrigType.F32)), BrigType.F32)) + >; + + def : Pat< + (f32 (fdiv fpimm:$src0, (int_HSAIL_nsqrt_f32 f32:$src1))), + (f32 (MUL_F32 1, 0, fpimm:$src0, (f32 (NRSQRT_F32 $src1, BrigType.F32)), BrigType.F32)) + >; + + def : Pat< + (f64 (fdiv GPR64:$src0, (int_HSAIL_nsqrt_f64 f64:$src1))), + (f64 (MUL_F64 0, 0, $src0, (f64 (NRSQRT_F64 $src1, BrigType.F64)), BrigType.F64)) + >; + + def : Pat< + (f64 (fdiv fpimm:$src0, (int_HSAIL_nsqrt_f64 f64:$src1))), + (f64 (MUL_F64 0, 0, fpimm:$src0, (f64 (NRSQRT_F64 (f64 GPR64:$src1), BrigType.F64)), BrigType.F64)) + >; + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Min/Max + +// A 'setcc' node with a single use. +def setcc_su : PatFrag<(ops node:$lhs, node:$rhs, node:$cc), + (setcc node:$lhs, node:$rhs, node:$cc), [{ + return N->hasOneUse(); +}]>; + +multiclass minmax { + def : Pat< + (select (i1 (setcc_su Ty:$src0, Ty:$src1, cc12)), Ty:$src0, Ty:$src1), + (inst ftz, round, $src0, $src1, bt) + >; + + def : Pat< + (select (i1 (setcc_su Ty:$src0, ImmTy:$src1, cc12)), Ty:$src0, ImmTy:$src1), + (inst ftz, round, $src0, ImmTy:$src1, bt) + >; + + def : Pat< + (select (i1 (setcc_su ImmTy:$src0, Ty:$src1, cc12)), ImmTy:$src0, Ty:$src1), + (inst ftz, round, ImmTy:$src0, $src1, bt) + >; + + def : Pat< + (select (i1 (setcc_su Ty:$src0, Ty:$src1, cc21)), Ty:$src1, Ty:$src0), + (inst ftz, round, $src0, $src1, bt) + >; + + def : Pat< + (select (i1 (setcc_su Ty:$src0, ImmTy:$src1, cc21)), ImmTy:$src1, Ty:$src0), + (inst ftz, round, $src0, ImmTy:$src1, bt) + >; + + def : Pat< + (select (i1 (setcc_su ImmTy:$src0, Ty:$src1, cc21)), Ty:$src1, ImmTy:$src0), + (inst ftz, round, ImmTy:$src0, $src1, bt) + >; +} + +// TODO: This should be moved to a DAG combine. This currently gets +// confused by canonicalizations of a compare with a constant. le/ge +// comparisons with a constant are canonicalized to lt/gt with the +// constant incremented, which breaks the simple pattern. +let Predicates = [EnableOpt] in { + defm : minmax; + defm : minmax; + defm : minmax; + defm : minmax; + defm : minmax; + defm : minmax; + defm : minmax; + defm : minmax; + defm : minmax; + defm : minmax; + defm : minmax; + defm : minmax; + defm : minmax; + defm : minmax; + defm : minmax; + defm : minmax; +} + +// Abs +let Predicates = [EnableOpt] in { + def : Pat< + (xor (add (sra i32:$src, (i32 31)), i32:$src), + (sra i32:$src, (i32 31))), + (ABS_S32 0, 0, $src, BrigType.S32) + >; + + def : Pat< + (xor (add (sra i64:$src, (i32 63)), i64:$src), + (sra i64:$src, (i32 63))), + (ABS_S64 0, 0, $src, BrigType.S64) + >; +} + +//////////////////////////////////////////////////////////////////////////////// +// fadd y (fmul x, 1) +multiclass FusedAddMul1 { + def _rri : Pat< + (fadd Ty:$src0, (fmul Ty:$src1, one)), + (op ftz, round, $src0, $src1, bt) + >; + + def _iri : Pat< + (fadd (fmul Ty:$src0, one), (Ty fpimm:$src1)), + (op ftz, round, $src0, (Ty fpimm:$src1), bt) + >; +} + +// FIXME: Change to default rounding mode +let Predicates = [EnableOpt] in { + defm addmul1_f32 : FusedAddMul1; + defm addmul1_f64 : FusedAddMul1; +} + +let Predicates = [EnableOpt] in { + def : Pat< + (or (shl i32:$src0, i32:$src1), + (srl i32:$src0, (sub imm32, (and i32:$src1, imm31)))), + (BITALIGN_B32 $src0, $src0, (i32 (NEG_S32 $src1, BrigType.S32)), BrigType.B32) + >; +} Index: lib/Target/HSAIL/HSAILISelDAGToDAG.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILISelDAGToDAG.cpp @@ -0,0 +1,1188 @@ +//=- HSAILISelDAGToDAG.cpp - A DAG pattern matching inst selector for HSAIL -=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a DAG pattern matching instruction selector for HSAIL, +// converting from a legalized dag to a HSAIL dag. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hsail-isel" +#include "HSAIL.h" +#include "HSAILBrigDefs.h" +#include "HSAILInstrInfo.h" +#include "HSAILMachineFunctionInfo.h" +#include "HSAILRegisterInfo.h" +#include "HSAILSubtarget.h" +#include "HSAILTargetMachine.h" +#include "HSAILUtilityFunctions.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Pattern Matcher Implementation +//===----------------------------------------------------------------------===// + +namespace llvm { +void initializeHSAILDAGToDAGISelPass(PassRegistry &); +} + +namespace { +//===--------------------------------------------------------------------===// +/// ISel - HSAIL specific code to select HSAIL machine instructions for +/// SelectionDAG operations. +/// +class HSAILDAGToDAGISel : public SelectionDAGISel { + /// Subtarget - Keep a pointer to the HSAILSubtarget around so that we can + /// make the right decision when generating code for different targets. + const HSAILSubtarget *Subtarget; + +public: + explicit HSAILDAGToDAGISel(TargetMachine &TM) + : SelectionDAGISel(TM), Subtarget(nullptr) {} + + virtual ~HSAILDAGToDAGISel() {} + + const char *getPassName() const override { + return "HSAIL DAG->DAG Instruction Selection"; + } + + bool runOnMachineFunction(MachineFunction &MF) override { + Subtarget = &MF.getSubtarget(); + return SelectionDAGISel::runOnMachineFunction(MF); + } + + bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override; + +private: + SDNode *Select(SDNode *N) override; + + SDNode *SelectINTRINSIC_WO_CHAIN(SDNode *Node); + SDNode *SelectINTRINSIC_W_CHAIN(SDNode *Node); + + SDNode *SelectImageIntrinsic(SDNode *Node); + SDNode *SelectActiveLaneMask(SDNode *Node); + // Helper for SelectAddrCommon + // Checks that OR operation is semantically equivalent to ADD + bool IsOREquivalentToADD(SDValue Op) const; + + bool SelectAddrCommon(SDValue Addr, SDValue &Base, SDValue &Reg, + int64_t &Offset, MVT ValueType, int Depth) const; + + bool SelectAddr(SDValue N, SDValue &Base, SDValue &Reg, + SDValue &Offset) const; + + bool SelectLoadAddr(SDNode *ParentLoad, SDValue Addr, SDValue &Base, + SDValue &Reg, SDValue &Offset, SDValue &Segment, + SDValue &Align, SDValue &Type, SDValue &Width, + SDValue &ModifierMask) const; + + bool SelectStoreAddr(SDNode *ParentStore, SDValue Addr, SDValue &Base, + SDValue &Reg, SDValue &Offset, SDValue &Segment, + SDValue &Align, + /*SDValue &Equiv,*/ + SDValue &Type) const; + + bool SelectAtomicAddr(SDNode *ParentLoad, SDValue Addr, SDValue &Segment, + SDValue &Order, SDValue &Scope, SDValue &Equiv, + + SDValue &Base, SDValue &Reg, SDValue &Offset) const; + + void SelectAddrSpaceCastCommon(const AddrSpaceCastSDNode &ASC, + SDValue &NoNull, SDValue &Ptr, + SDValue &DestType, SDValue &SrcType) const; + + SDNode *SelectAddrSpaceCast(AddrSpaceCastSDNode *ASC) const; + + SDNode *SelectSetCC(SDNode *SetCC) const; + + SDNode *SelectArgLd(MemSDNode *SetCC) const; + SDNode *SelectArgSt(MemSDNode *SetCC) const; + + bool SelectGPROrImm(SDValue In, SDValue &Src) const; + bool MemOpHasPtr32(SDNode *N) const; + + bool isKernelFunc(void) const; +// Include the pieces autogenerated from the target description. +#include "HSAILGenDAGISel.inc" +}; +} + +static BrigType getBrigType(MVT::SimpleValueType VT, bool Signed) { + switch (VT) { + case MVT::i32: + return Signed ? BRIG_TYPE_S32 : BRIG_TYPE_U32; + case MVT::f32: + return BRIG_TYPE_F32; + case MVT::i8: + return Signed ? BRIG_TYPE_S8 : BRIG_TYPE_U8; + case MVT::i16: + return Signed ? BRIG_TYPE_S16 : BRIG_TYPE_U16; + case MVT::i64: + return Signed ? BRIG_TYPE_S64 : BRIG_TYPE_U64; + case MVT::f64: + return BRIG_TYPE_F64; + case MVT::i1: + return BRIG_TYPE_B1; + default: + llvm_unreachable("Unhandled type for MVT -> BRIG"); + } +} + +static BrigType getBrigTypeFromStoreType(MVT::SimpleValueType VT) { + switch (VT) { + case MVT::i32: + return BRIG_TYPE_U32; + case MVT::f32: + return BRIG_TYPE_F32; + case MVT::i8: + return BRIG_TYPE_U8; + case MVT::i16: + return BRIG_TYPE_U16; + case MVT::i64: + return BRIG_TYPE_U64; + case MVT::f64: + return BRIG_TYPE_F64; + default: + llvm_unreachable("Unhandled type for MVT -> BRIG"); + } +} + +bool HSAILDAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, + SDNode *Root) const { + return true; +} + +bool HSAILDAGToDAGISel::SelectGPROrImm(SDValue In, SDValue &Src) const { + if (ConstantSDNode *C = dyn_cast(In)) { + SDLoc SL(In); + Src = CurDAG->getTargetConstant(C->getAPIntValue(), SL, C->getValueType(0)); + } else if (ConstantFPSDNode *C = dyn_cast(In)) { + SDLoc SL(In); + Src = CurDAG->getTargetConstantFP(C->getValueAPF(), SL, C->getValueType(0)); + } else + Src = In; + + return true; +} + +bool HSAILDAGToDAGISel::isKernelFunc() const { + const MachineFunction &MF = CurDAG->getMachineFunction(); + return HSAIL::isKernelFunc(MF.getFunction()); +} + +static unsigned getImageInstr(HSAILIntrinsic::ID intr) { + switch (intr) { + default: + llvm_unreachable("unexpected intrinsinc ID for images"); + case HSAILIntrinsic::HSAIL_rd_imgf_1d_f32: + return HSAIL::rd_imgf_1d_f32; + case HSAILIntrinsic::HSAIL_rd_imgf_1d_s32: + return HSAIL::rd_imgf_1d_s32; + case HSAILIntrinsic::HSAIL_rd_imgf_1da_f32: + return HSAIL::rd_imgf_1da_f32; + case HSAILIntrinsic::HSAIL_rd_imgf_1da_s32: + return HSAIL::rd_imgf_1da_s32; + case HSAILIntrinsic::HSAIL_rd_imgf_2d_f32: + return HSAIL::rd_imgf_2d_f32; + case HSAILIntrinsic::HSAIL_rd_imgf_2d_s32: + return HSAIL::rd_imgf_2d_s32; + case HSAILIntrinsic::HSAIL_rd_imgf_2da_f32: + return HSAIL::rd_imgf_2da_f32; + case HSAILIntrinsic::HSAIL_rd_imgf_2da_s32: + return HSAIL::rd_imgf_2da_s32; + case HSAILIntrinsic::HSAIL_rd_imgf_3d_f32: + return HSAIL::rd_imgf_3d_f32; + case HSAILIntrinsic::HSAIL_rd_imgf_3d_s32: + return HSAIL::rd_imgf_3d_s32; + case HSAILIntrinsic::HSAIL_rd_imgi_1d_f32: + return HSAIL::rd_imgi_1d_f32; + case HSAILIntrinsic::HSAIL_rd_imgi_1d_s32: + return HSAIL::rd_imgi_1d_s32; + case HSAILIntrinsic::HSAIL_rd_imgi_1da_f32: + return HSAIL::rd_imgi_1da_f32; + case HSAILIntrinsic::HSAIL_rd_imgi_1da_s32: + return HSAIL::rd_imgi_1da_s32; + case HSAILIntrinsic::HSAIL_rd_imgi_2d_f32: + return HSAIL::rd_imgi_2d_f32; + case HSAILIntrinsic::HSAIL_rd_imgi_2d_s32: + return HSAIL::rd_imgi_2d_s32; + case HSAILIntrinsic::HSAIL_rd_imgi_2da_f32: + return HSAIL::rd_imgi_2da_f32; + case HSAILIntrinsic::HSAIL_rd_imgi_2da_s32: + return HSAIL::rd_imgi_2da_s32; + case HSAILIntrinsic::HSAIL_rd_imgi_3d_f32: + return HSAIL::rd_imgi_3d_f32; + case HSAILIntrinsic::HSAIL_rd_imgi_3d_s32: + return HSAIL::rd_imgi_3d_s32; + case HSAILIntrinsic::HSAIL_rd_imgui_1d_f32: + return HSAIL::rd_imgui_1d_f32; + case HSAILIntrinsic::HSAIL_rd_imgui_1d_s32: + return HSAIL::rd_imgui_1d_s32; + case HSAILIntrinsic::HSAIL_rd_imgui_1da_f32: + return HSAIL::rd_imgui_1da_f32; + case HSAILIntrinsic::HSAIL_rd_imgui_1da_s32: + return HSAIL::rd_imgui_1da_s32; + case HSAILIntrinsic::HSAIL_rd_imgui_2d_f32: + return HSAIL::rd_imgui_2d_f32; + case HSAILIntrinsic::HSAIL_rd_imgui_2d_s32: + return HSAIL::rd_imgui_2d_s32; + case HSAILIntrinsic::HSAIL_rd_imgui_2da_f32: + return HSAIL::rd_imgui_2da_f32; + case HSAILIntrinsic::HSAIL_rd_imgui_2da_s32: + return HSAIL::rd_imgui_2da_s32; + case HSAILIntrinsic::HSAIL_rd_imgui_3d_f32: + return HSAIL::rd_imgui_3d_f32; + case HSAILIntrinsic::HSAIL_rd_imgui_3d_s32: + return HSAIL::rd_imgui_3d_s32; + case HSAILIntrinsic::HSAIL_rd_imgf_2ddepth_f32: + return HSAIL::rd_imgf_2ddepth_f32; + case HSAILIntrinsic::HSAIL_rd_imgf_2ddepth_s32: + return HSAIL::rd_imgf_2ddepth_s32; + case HSAILIntrinsic::HSAIL_rd_imgf_2dadepth_f32: + return HSAIL::rd_imgf_2dadepth_f32; + case HSAILIntrinsic::HSAIL_rd_imgf_2dadepth_s32: + return HSAIL::rd_imgf_2dadepth_s32; + + case HSAILIntrinsic::HSAIL_ld_imgf_1d_u32: + return HSAIL::ld_imgf_1d_u32; + case HSAILIntrinsic::HSAIL_ld_imgf_1da_u32: + return HSAIL::ld_imgf_1da_u32; + case HSAILIntrinsic::HSAIL_ld_imgf_1db_u32: + return HSAIL::ld_imgf_1db_u32; + case HSAILIntrinsic::HSAIL_ld_imgf_2d_u32: + return HSAIL::ld_imgf_2d_u32; + case HSAILIntrinsic::HSAIL_ld_imgf_2da_u32: + return HSAIL::ld_imgf_2da_u32; + case HSAILIntrinsic::HSAIL_ld_imgf_3d_u32: + return HSAIL::ld_imgf_3d_u32; + case HSAILIntrinsic::HSAIL_ld_imgi_1d_u32: + return HSAIL::ld_imgi_1d_u32; + case HSAILIntrinsic::HSAIL_ld_imgi_1da_u32: + return HSAIL::ld_imgi_1da_u32; + case HSAILIntrinsic::HSAIL_ld_imgi_1db_u32: + return HSAIL::ld_imgi_1db_u32; + case HSAILIntrinsic::HSAIL_ld_imgi_2d_u32: + return HSAIL::ld_imgi_2d_u32; + case HSAILIntrinsic::HSAIL_ld_imgi_2da_u32: + return HSAIL::ld_imgi_2da_u32; + case HSAILIntrinsic::HSAIL_ld_imgi_3d_u32: + return HSAIL::ld_imgi_3d_u32; + case HSAILIntrinsic::HSAIL_ld_imgui_1d_u32: + return HSAIL::ld_imgui_1d_u32; + case HSAILIntrinsic::HSAIL_ld_imgui_1da_u32: + return HSAIL::ld_imgui_1da_u32; + case HSAILIntrinsic::HSAIL_ld_imgui_1db_u32: + return HSAIL::ld_imgui_1db_u32; + case HSAILIntrinsic::HSAIL_ld_imgui_2d_u32: + return HSAIL::ld_imgui_2d_u32; + case HSAILIntrinsic::HSAIL_ld_imgui_2da_u32: + return HSAIL::ld_imgui_2da_u32; + case HSAILIntrinsic::HSAIL_ld_imgui_3d_u32: + return HSAIL::ld_imgui_3d_u32; + case HSAILIntrinsic::HSAIL_ld_imgf_2ddepth_u32: + return HSAIL::ld_imgf_2ddepth_u32; + case HSAILIntrinsic::HSAIL_ld_imgf_2dadepth_u32: + return HSAIL::ld_imgf_2dadepth_u32; + } +} + +SDNode *HSAILDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *Node) { + unsigned IntID = cast(Node->getOperand(0))->getZExtValue(); + switch (IntID) { + case HSAILIntrinsic::HSAIL_ftz_f32: { + SDLoc SL(Node); + + // This is a workaround for not being able to create fpimm in an output + // pattern. + const SDValue Ops[] = { + CurDAG->getTargetConstant(1, SL, MVT::i1), // ftz + CurDAG->getTargetConstant(BRIG_ROUND_FLOAT_DEFAULT, SL, MVT::i32), // round + Node->getOperand(1), // src0 + CurDAG->getConstantFP(0.0, SL, MVT::f32), // src1 + CurDAG->getTargetConstant(BRIG_TYPE_F32, SL, MVT::i32) // TypeLength + }; + + return CurDAG->SelectNodeTo(Node, HSAIL::ADD_F32, MVT::f32, Ops); + } + case HSAILIntrinsic::HSAIL_mul_ftz_f32: { + SDLoc SL(Node); + + // This is a workaround for not being able to create fpimm in an output + // pattern. + const SDValue Ops[] = { + CurDAG->getTargetConstant(1, SL, MVT::i1), // ftz + CurDAG->getTargetConstant(BRIG_ROUND_FLOAT_DEFAULT, SL, MVT::i32), // round + Node->getOperand(1), // src0 + CurDAG->getConstantFP(BitsToFloat(0x3f800000), SL, MVT::f32), // src1 + CurDAG->getTargetConstant(BRIG_TYPE_F32, SL, MVT::i32) // TypeLength + }; + + return CurDAG->SelectNodeTo(Node, HSAIL::MUL_F32, MVT::f32, Ops); + } + default: + return SelectCode(Node); + } +} + +SDNode *HSAILDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *Node) { + unsigned IntNo = cast(Node->getOperand(1))->getZExtValue(); + if (HSAILIntrinsicInfo::isReadImage((HSAILIntrinsic::ID)IntNo) || + HSAILIntrinsicInfo::isLoadImage((HSAILIntrinsic::ID)IntNo)) + return SelectImageIntrinsic(Node); + + return SelectCode(Node); +} + +SDNode *HSAILDAGToDAGISel::SelectImageIntrinsic(SDNode *Node) { + SDValue Chain = Node->getOperand(0); + SDNode *ResNode; + + unsigned IntNo = cast(Node->getOperand(1))->getZExtValue(); + bool hasSampler = false; + + if (IntNo == HSAILIntrinsic::HSAIL_rd_imgf_1d_s32) { + SDLoc SL; + const SDValue Ops[] = { + CurDAG->getTargetConstant(1, SL, MVT::i1), // v4 + CurDAG->getTargetConstant(BRIG_TYPE_ROIMG, SL, MVT::i32), // imageType + CurDAG->getTargetConstant(BRIG_TYPE_S32, SL, MVT::i32), // coordType + CurDAG->getTargetConstant(BRIG_GEOMETRY_1D, SL, MVT::i32), // geometry + CurDAG->getTargetConstant(0, SL, MVT::i32), // equiv + Node->getOperand(2), // image + Node->getOperand(3), // sampler + Node->getOperand(4), // coordWidth + CurDAG->getTargetConstant(BRIG_TYPE_F32, SL, MVT::i32), // destType + Chain + }; + + return CurDAG->SelectNodeTo(Node, HSAIL::RDIMAGE, Node->getVTList(), Ops); + } + + if (HSAILIntrinsicInfo::isReadImage((HSAILIntrinsic::ID)IntNo)) { + hasSampler = true; + } else if (!HSAILIntrinsicInfo::isLoadImage((HSAILIntrinsic::ID)IntNo)) { + return SelectCode(Node); + } + + if (((HSAILIntrinsic::ID)IntNo) == + (HSAILIntrinsic::HSAIL_rd_imgf_2ddepth_f32) || + ((HSAILIntrinsic::ID)IntNo) == + (HSAILIntrinsic::HSAIL_rd_imgf_2ddepth_s32) || + ((HSAILIntrinsic::ID)IntNo) == + (HSAILIntrinsic::HSAIL_ld_imgf_2ddepth_u32) || + ((HSAILIntrinsic::ID)IntNo) == + (HSAILIntrinsic::HSAIL_rd_imgf_2dadepth_f32) || + ((HSAILIntrinsic::ID)IntNo) == + (HSAILIntrinsic::HSAIL_rd_imgf_2dadepth_s32) || + ((HSAILIntrinsic::ID)IntNo) == + (HSAILIntrinsic::HSAIL_ld_imgf_2dadepth_u32)) { + assert(Node->getNumValues() == 2); + } else { + assert(Node->getNumValues() == 5); + } + SmallVector NewOps; + + unsigned OpIndex = 2; + + SDValue Img = Node->getOperand(OpIndex++); + int ResNo = Img.getResNo(); + SDValue ImgHandle = Img.getValue(ResNo); + NewOps.push_back(ImgHandle); + + if (hasSampler) { + SDValue Smp = Node->getOperand(OpIndex++); + SDValue SmpHandle = Smp.getValue(Smp.getResNo()); + NewOps.push_back(SmpHandle); + } + + while (OpIndex < Node->getNumOperands()) { + SDValue Coord = Node->getOperand(OpIndex++); + NewOps.push_back(Coord); + } + + NewOps.push_back(Chain); + + ResNode = CurDAG->SelectNodeTo(Node, getImageInstr((HSAILIntrinsic::ID)IntNo), + Node->getVTList(), NewOps); + return ResNode; +} + +SDNode *HSAILDAGToDAGISel::SelectActiveLaneMask(SDNode *Node) { + SDLoc SL(Node); + SDValue Ops[] = { + Node->getOperand(1), // width + Node->getOperand(2), // src0 + CurDAG->getTargetConstant(BRIG_TYPE_B64, SL, MVT::i32), // TypeLength + CurDAG->getTargetConstant(BRIG_TYPE_B1, SL, MVT::i32), // sourceType + Node->getOperand(0) // Chain + }; + + SelectGPROrImm(Ops[1], Ops[1]); + + return CurDAG->SelectNodeTo(Node, HSAIL::ACTIVELANEMASK_V4_B64_B1, + Node->getVTList(), Ops); +} + +SDNode *HSAILDAGToDAGISel::Select(SDNode *Node) { + assert(Node); + + EVT NVT = Node->getValueType(0); + unsigned Opcode = Node->getOpcode(); + SDNode *ResNode; + + DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n'); + + if (Node->isMachineOpcode()) { + DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n'); + return nullptr; // Already selected. + } + + switch (Opcode) { + default: + ResNode = SelectCode(Node); + break; + case ISD::SETCC: + ResNode = SelectSetCC(Node); + break; + case ISD::FrameIndex: { + if (FrameIndexSDNode *FIN = dyn_cast(Node)) { + SDLoc SL(Node); + SDValue Ops[] = { + CurDAG->getTargetConstant(HSAILAS::PRIVATE_ADDRESS, SL, MVT::i32), + CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32), + CurDAG->getRegister(0, NVT), + CurDAG->getTargetConstant(0, SL, NVT), + CurDAG->getTargetConstant(BRIG_TYPE_U32, SL, MVT::i32) + }; + + ResNode = CurDAG->SelectNodeTo(Node, HSAIL::LDA_U32, NVT, Ops); + } else { + ResNode = Node; + } + break; + } + case ISD::GlobalAddress: { + const GlobalAddressSDNode *GSDN = cast(Node); + const GlobalValue *GV = GSDN->getGlobal(); + EVT PtrVT = Node->getValueType(0); + unsigned AS = GSDN->getAddressSpace(); + SDLoc SL(Node); + + BrigType BT = (PtrVT == MVT::i32) ? BRIG_TYPE_U32 : BRIG_TYPE_U64; + unsigned Opcode = (PtrVT == MVT::i32) ? HSAIL::LDA_U32 : HSAIL::LDA_U64; + + const SDValue Ops[] = { + CurDAG->getTargetConstant(AS, SL, MVT::i32), + CurDAG->getTargetGlobalAddress(GV, SL, PtrVT, 0), + CurDAG->getRegister(HSAIL::NoRegister, NVT), + CurDAG->getTargetConstant(GSDN->getOffset(), SL, PtrVT), + CurDAG->getTargetConstant(BT, SL, MVT::i32) + }; + + ResNode = CurDAG->SelectNodeTo(Node, Opcode, PtrVT, Ops); + break; + } + case ISD::INTRINSIC_WO_CHAIN: + ResNode = SelectINTRINSIC_WO_CHAIN(Node); + break; + case ISD::INTRINSIC_W_CHAIN: + ResNode = SelectINTRINSIC_W_CHAIN(Node); + break; + + case ISD::CALLSEQ_START: { + // LLVM 3.6 unable to select start/end of call sequence chained with the + // rest of the arg scope operations due to the WalkChainUsers check which + // reports it may induce a cycle in the graph, so select it manually. + ResNode = CurDAG->SelectNodeTo(Node, HSAIL::ARG_SCOPE_START, MVT::Other, + MVT::Glue, + Node->getOperand(1), // src0 + Node->getOperand(0)); // Chain + break; + } + case ISD::CALLSEQ_END: { + const SDValue Ops[] = { + Node->getOperand(1), // src0 + Node->getOperand(2), // src1 + Node->getOperand(0), // Chain + Node->getOperand(3) // Glue + }; + + ResNode = CurDAG->SelectNodeTo(Node, HSAIL::ARG_SCOPE_END, MVT::Other, + MVT::Glue, Ops); + break; + } + case HSAILISD::ARG_LD: { + ResNode = SelectArgLd(cast(Node)); + break; + } + case HSAILISD::ARG_ST: { + ResNode = SelectArgSt(cast(Node)); + break; + } + case ISD::ADDRSPACECAST: { + ResNode = SelectAddrSpaceCast(cast(Node)); + break; + } + case HSAILISD::ACTIVELANEMASK: + return SelectActiveLaneMask(Node); + } + + return ResNode; +} + +bool HSAILDAGToDAGISel::IsOREquivalentToADD(SDValue Op) const { + assert(Op.getOpcode() == ISD::OR); + + SDValue N0 = Op->getOperand(0); + SDValue N1 = Op->getOperand(1); + EVT VT = N0.getValueType(); + + // Highly inspired by (a|b) case in DAGCombiner::visitADD + if (VT.isInteger() && !VT.isVector()) { + APInt LHSZero, LHSOne; + APInt RHSZero, RHSOne; + CurDAG->computeKnownBits(N0, LHSZero, LHSOne); + + if (LHSZero.getBoolValue()) { + CurDAG->computeKnownBits(N1, RHSZero, RHSOne); + + // If all possibly-set bits on the LHS are clear on the RHS, return yes. + // If all possibly-set bits on the RHS are clear on the LHS, return yes. + if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) + return true; + } + } + + // Fallback to the more conservative check + return CurDAG->isBaseWithConstantOffset(Op); +} + +/// \brief Return true if the pointer is 32-bit in large and small models +static bool addrSpaceHasPtr32(unsigned AS) { + switch (AS) { + default: + return false; + + case HSAILAS::GROUP_ADDRESS: + case HSAILAS::ARG_ADDRESS: + case HSAILAS::PRIVATE_ADDRESS: + case HSAILAS::SPILL_ADDRESS: + return true; + } +} + +/// We accept an SDNode to keep things simple in the TD files. The +/// cast to MemSDNode will never assert because this predicate is only +/// used in a pattern fragment that matches load or store nodes. +bool HSAILDAGToDAGISel::MemOpHasPtr32(SDNode *N) const { + return addrSpaceHasPtr32(cast(N)->getAddressSpace()); +} + +bool HSAILDAGToDAGISel::SelectAddrCommon(SDValue Addr, SDValue &Base, + SDValue &Reg, int64_t &Offset, + MVT ValueType, int Depth) const { + if (Depth > 5) + return false; + + SDValue backup_base = Base, backup_reg = Reg; + int64_t backup_offset = Offset; + + switch (Addr.getOpcode()) { + case ISD::Constant: { + int64_t new_offset = cast(Addr)->getSExtValue(); + // No 64 bit offsets in 32 bit target + if (Subtarget->isSmallModel() && !isInt<32>(new_offset)) + return false; + Offset += new_offset; + return true; + } + case ISD::FrameIndex: { + if (Base.getNode() == 0) { + Base = CurDAG->getTargetFrameIndex( + cast(Addr)->getIndex(), ValueType); + return true; + } + break; + } + case ISD::TargetGlobalAddress: + case ISD::GlobalAddress: + case ISD::GlobalTLSAddress: + case ISD::TargetGlobalTLSAddress: { + if (Base.getNode() == 0) { + Base = CurDAG->getTargetGlobalAddress( + cast(Addr)->getGlobal(), SDLoc(Addr), ValueType); + int64_t new_offset = + Offset + cast(Addr)->getOffset(); + if (Subtarget->isSmallModel() && !isInt<32>(new_offset)) + return false; + Offset += new_offset; + return true; + } + break; + } + case ISD::TargetExternalSymbol: { + if (Base.getNode() == 0) { + Base = Addr; + return true; + } + break; + } + case ISD::OR: // Treat OR as ADD when Op1 & Op2 == 0 + if (IsOREquivalentToADD(Addr)) { + bool can_selec_first_op = SelectAddrCommon(Addr.getOperand(0), Base, Reg, + Offset, ValueType, Depth + 1); + + if (can_selec_first_op && SelectAddrCommon(Addr.getOperand(1), Base, Reg, + Offset, ValueType, Depth + 1)) + return true; + Base = backup_base; + Reg = backup_reg; + Offset = backup_offset; + } + break; + case ISD::ADD: { + bool can_selec_first_op = SelectAddrCommon(Addr.getOperand(0), Base, Reg, + Offset, ValueType, Depth + 1); + + if (can_selec_first_op && SelectAddrCommon(Addr.getOperand(1), Base, Reg, + Offset, ValueType, Depth + 1)) + return true; + Base = backup_base; + Reg = backup_reg; + Offset = backup_offset; + break; + } + case HSAILISD::LDA: { + if (SelectAddrCommon(Addr.getOperand(1), Base, Reg, Offset, ValueType, + Depth + 1)) { + return true; + } + Base = backup_base; + Reg = backup_reg; + Offset = backup_offset; + break; + } + default: + break; + } + + // By default generate address as register + if (Reg.getNode() == 0) { + Reg = Addr; + return true; + } + return false; +} + +/// SelectAddr - returns true if it is able pattern match an addressing mode. +/// It returns the operands which make up the maximal addressing mode it can +/// match by reference. +/// +/// Parent is the parent node of the addr operand that is being matched. It +/// is always a load, store, atomic node, or null. It is only null when +/// checking memory operands for inline asm nodes. +bool HSAILDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Reg, + SDValue &Offset) const { + MVT VT = Addr.getValueType().getSimpleVT(); + assert(VT == MVT::i32 || VT == MVT::i64); + + int64_t disp = 0; + if (!SelectAddrCommon(Addr, Base, Reg, disp, VT, 0)) + return false; + + SDLoc SL(Addr); + Offset = CurDAG->getTargetConstant(disp, SL, VT); + if (Base.getNode() == 0) + Base = CurDAG->getRegister(0, VT); + if (Reg.getNode() == 0) + Reg = CurDAG->getRegister(0, VT); + return true; +} + +bool HSAILDAGToDAGISel::SelectLoadAddr(SDNode *ParentLoad, SDValue Addr, + SDValue &Base, SDValue &Reg, + SDValue &Offset, SDValue &Segment, + SDValue &Align, SDValue &Type, + SDValue &Width, + SDValue &ModifierMask) const { + const LoadSDNode *Load = cast(ParentLoad); + assert(!Load->isIndexed()); + + if (!SelectAddr(Addr, Base, Reg, Offset)) + return false; + + unsigned AS = Load->getAddressSpace(); + + SDLoc SL(ParentLoad); + + MVT MemVT = Load->getMemoryVT().getSimpleVT(); + ISD::LoadExtType ExtTy = Load->getExtensionType(); + unsigned BrigType = getBrigType(MemVT.SimpleTy, ExtTy == ISD::SEXTLOAD); + + Segment = CurDAG->getTargetConstant(AS, SL, MVT::i32); + Align = CurDAG->getTargetConstant(Load->getAlignment(), SL, MVT::i32); + Type = CurDAG->getTargetConstant(BrigType, SL, MVT::i32); + Width = CurDAG->getTargetConstant(BRIG_WIDTH_1, SL, MVT::i32); + + // TODO: Set if invariant. + ModifierMask = CurDAG->getTargetConstant(0, SL, MVT::i32); + return true; +} + +bool HSAILDAGToDAGISel::SelectStoreAddr(SDNode *ParentStore, SDValue Addr, + SDValue &Base, SDValue &Reg, + SDValue &Offset, SDValue &Segment, + SDValue &Align, + /*SDValue &Equiv,*/ + SDValue &Type) const { + const StoreSDNode *Store = cast(ParentStore); + assert(!Store->isIndexed()); + + if (!SelectAddr(Addr, Base, Reg, Offset)) + return false; + + unsigned AS = Store->getAddressSpace(); + + MVT MemVT = Store->getMemoryVT().getSimpleVT(); + unsigned BrigType = getBrigTypeFromStoreType(MemVT.SimpleTy); + + SDLoc SL(ParentStore); + + Segment = CurDAG->getTargetConstant(AS, SL, MVT::i32); + Align = CurDAG->getTargetConstant(Store->getAlignment(), SL, MVT::i32); + // Equiv = CurDAG->getTargetConstant(0, SL, MVT::i32); + Type = CurDAG->getTargetConstant(BrigType, SL, MVT::i32); + return true; +} + +static BrigMemoryOrder getBrigMemoryOrder(AtomicOrdering Order) { + switch (Order) { + case Monotonic: + return BRIG_MEMORY_ORDER_RELAXED; + case Acquire: + return BRIG_MEMORY_ORDER_SC_ACQUIRE; + case Release: + return BRIG_MEMORY_ORDER_SC_RELEASE; + case AcquireRelease: + case SequentiallyConsistent: + return BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE; + default: + llvm_unreachable("unhandled memory order"); + } +} + +static BrigMemoryScope getBrigMemoryScope(SynchronizationScope Scope, + unsigned AS) { + switch (Scope) { + case SingleThread: + return BRIG_MEMORY_SCOPE_WORKITEM; + case CrossThread: + // FIXME: This needs to be fixed when LLVM support other scope values. + switch (AS) { + case HSAILAS::GROUP_ADDRESS: + return BRIG_MEMORY_SCOPE_WORKGROUP; + case HSAILAS::REGION_ADDRESS: + return BRIG_MEMORY_SCOPE_AGENT; + default: + return BRIG_MEMORY_SCOPE_SYSTEM; + } + } +} + +bool HSAILDAGToDAGISel::SelectAtomicAddr(SDNode *ParentAtomic, SDValue Addr, + SDValue &Segment, SDValue &Order, + SDValue &Scope, SDValue &Equiv, + + SDValue &Base, SDValue &Reg, + SDValue &Offset) const { + if (!SelectAddr(Addr, Base, Reg, Offset)) + return false; + + const AtomicSDNode *Atomic = cast(ParentAtomic); + + // XXX - What do we do with the failure ordering? + AtomicOrdering SuccOrder = Atomic->getSuccessOrdering(); + SynchronizationScope SyncScope = Atomic->getSynchScope(); + + unsigned AS = Atomic->getAddressSpace(); + + SDLoc SL(ParentAtomic); + + Segment = CurDAG->getTargetConstant(AS, SL, MVT::i32); + Order = CurDAG->getTargetConstant(getBrigMemoryOrder(SuccOrder), SL, MVT::i32); + Scope = + CurDAG->getTargetConstant(getBrigMemoryScope(SyncScope, AS), SL, MVT::i32); + Equiv = CurDAG->getTargetConstant(0, SL, MVT::i32); + + return true; +} + +void HSAILDAGToDAGISel::SelectAddrSpaceCastCommon( + const AddrSpaceCastSDNode &ASC, SDValue &NoNull, SDValue &Ptr, + SDValue &DestType, SDValue &SrcType) const { + SDLoc SL(&ASC); + + SelectGPROrImm(ASC.getOperand(0), Ptr); + NoNull = CurDAG->getTargetConstant(0, SL, MVT::i1); + + BrigType DestBT = + getBrigType(ASC.getValueType(0).getSimpleVT().SimpleTy, false); + BrigType SrcBT = + getBrigType(Ptr.getValueType().getSimpleVT().SimpleTy, false); + + DestType = CurDAG->getTargetConstant(DestBT, SL, MVT::i32); + SrcType = CurDAG->getTargetConstant(SrcBT, SL, MVT::i32); +} + +SDNode *HSAILDAGToDAGISel::SelectAddrSpaceCast(AddrSpaceCastSDNode *ASC) const { + SDValue Cast(ASC, 0); + unsigned DstAS = ASC->getDestAddressSpace(); + unsigned SrcAS = ASC->getSrcAddressSpace(); + + EVT DestVT = ASC->getValueType(0); + EVT SrcVT = ASC->getOperand(0).getValueType(); + bool Src32 = (SrcVT == MVT::i32); + bool Dst32 = (DestVT == MVT::i32); + + unsigned Opcode; + SDValue Segment, NoNull, Ptr, DestType, SrcType; + + SDLoc SL(ASC); + + if (SrcAS == HSAILAS::FLAT_ADDRESS) { + if (Src32 && Dst32) + Opcode = HSAIL::FTOS_U32_U32; + else if (Src32 && !Dst32) + llvm_unreachable("Pointer size combination should not happen"); + else if (!Src32 && Dst32) + Opcode = HSAIL::FTOS_U32_U64; + else + Opcode = HSAIL::FTOS_U64_U64; + + Segment = CurDAG->getTargetConstant(DstAS, SL, MVT::i32); + SelectAddrSpaceCastCommon(*ASC, NoNull, Ptr, DestType, SrcType); + } else if (DstAS == HSAILAS::FLAT_ADDRESS) { + if (Src32 && Dst32) + Opcode = HSAIL::STOF_U32_U32; + else if (Src32 && !Dst32) + Opcode = HSAIL::STOF_U64_U32; + else if (!Src32 && Dst32) + llvm_unreachable("Pointer size combination should not happen"); + else + Opcode = HSAIL::STOF_U64_U64; + + Segment = CurDAG->getTargetConstant(SrcAS, SL, MVT::i32); + SelectAddrSpaceCastCommon(*ASC, NoNull, Ptr, DestType, SrcType); + } else + return nullptr; + + const SDValue Ops[] = {Segment, NoNull, Ptr, DestType, SrcType}; + + return CurDAG->SelectNodeTo(ASC, Opcode, DestVT, Ops); +} + +static BrigCompareOperation getBrigIntCompare(ISD::CondCode CC, bool &Signed) { + switch (CC) { + case ISD::SETUEQ: + Signed = true; // Sign is irrelevant, use to be consistent. + return BRIG_COMPARE_EQ; + case ISD::SETUGT: + return BRIG_COMPARE_GT; + case ISD::SETUGE: + return BRIG_COMPARE_GE; + case ISD::SETULT: + return BRIG_COMPARE_LT; + case ISD::SETULE: + return BRIG_COMPARE_LE; + case ISD::SETUNE: + Signed = true; // Sign is irrelevant, use to be consistent. + return BRIG_COMPARE_NE; + case ISD::SETEQ: + Signed = true; + return BRIG_COMPARE_EQ; + case ISD::SETGT: + Signed = true; + return BRIG_COMPARE_GT; + case ISD::SETGE: + Signed = true; + return BRIG_COMPARE_GE; + case ISD::SETLT: + Signed = true; + return BRIG_COMPARE_LT; + case ISD::SETLE: + Signed = true; + return BRIG_COMPARE_LE; + case ISD::SETNE: + Signed = true; + return BRIG_COMPARE_NE; + default: + llvm_unreachable("unhandled cond code"); + } +} + +static BrigCompareOperation getBrigFPCompare(ISD::CondCode CC) { + switch (CC) { + case ISD::SETOEQ: + case ISD::SETEQ: + return BRIG_COMPARE_EQ; + case ISD::SETOGT: + case ISD::SETGT: + return BRIG_COMPARE_GT; + case ISD::SETOGE: + case ISD::SETGE: + return BRIG_COMPARE_GE; + case ISD::SETOLT: + case ISD::SETLT: + return BRIG_COMPARE_LT; + case ISD::SETOLE: + case ISD::SETLE: + return BRIG_COMPARE_LE; + case ISD::SETONE: + case ISD::SETNE: + return BRIG_COMPARE_NE; + case ISD::SETO: + return BRIG_COMPARE_NUM; + case ISD::SETUO: + return BRIG_COMPARE_NAN; + case ISD::SETUEQ: + return BRIG_COMPARE_EQU; + case ISD::SETUGT: + return BRIG_COMPARE_GTU; + case ISD::SETUGE: + return BRIG_COMPARE_GEU; + case ISD::SETULT: + return BRIG_COMPARE_LTU; + case ISD::SETULE: + return BRIG_COMPARE_LEU; + case ISD::SETUNE: + return BRIG_COMPARE_NEU; + default: + llvm_unreachable("unhandled cond code"); + } +} + +static unsigned getCmpOpcode(BrigType SrcBT) { + switch (SrcBT) { + case BRIG_TYPE_B1: + return HSAIL::CMP_B1_B1; + case BRIG_TYPE_S32: + return HSAIL::CMP_B1_S32; + case BRIG_TYPE_U32: + return HSAIL::CMP_B1_U32; + case BRIG_TYPE_S64: + return HSAIL::CMP_B1_S64; + case BRIG_TYPE_U64: + return HSAIL::CMP_B1_U64; + case BRIG_TYPE_F32: + return HSAIL::CMP_B1_F32; + case BRIG_TYPE_F64: + return HSAIL::CMP_B1_F64; + default: + llvm_unreachable("Compare of type not supported"); + } +} + +SDNode *HSAILDAGToDAGISel::SelectSetCC(SDNode *SetCC) const { + SDValue LHS, RHS; + + if (!SelectGPROrImm(SetCC->getOperand(0), LHS)) + return nullptr; + + if (!SelectGPROrImm(SetCC->getOperand(1), RHS)) + return nullptr; + + MVT VT = LHS.getValueType().getSimpleVT(); + ISD::CondCode CC = cast(SetCC->getOperand(2))->get(); + + bool Signed = false; + BrigCompareOperation BrigCmp; + + if (VT.isFloatingPoint()) + BrigCmp = getBrigFPCompare(CC); + else + BrigCmp = getBrigIntCompare(CC, Signed); + + SDLoc SL(SetCC); + + SDValue CmpOp = CurDAG->getTargetConstant(BrigCmp, SL, MVT::i32); + SDValue FTZ = CurDAG->getTargetConstant(VT == MVT::f32, SL, MVT::i1); + + // TODO: Should be able to fold conversions into this instead. + SDValue DestType = CurDAG->getTargetConstant(BRIG_TYPE_B1, SL, MVT::i32); + + BrigType SrcBT = getBrigType(VT.SimpleTy, Signed); + SDValue SrcType = CurDAG->getTargetConstant(SrcBT, SL, MVT::i32); + + const SDValue Ops[] = {CmpOp, FTZ, LHS, RHS, DestType, SrcType}; + + return CurDAG->SelectNodeTo(SetCC, getCmpOpcode(SrcBT), MVT::i1, Ops); +} + +static unsigned getLoadBrigOpcode(BrigType BT) { + switch (BT) { + case BRIG_TYPE_U32: + return HSAIL::LD_U32; + case BRIG_TYPE_S32: + return HSAIL::LD_S32; + case BRIG_TYPE_F32: + return HSAIL::LD_F32; + case BRIG_TYPE_U64: + return HSAIL::LD_U64; + case BRIG_TYPE_S64: + return HSAIL::LD_S64; + case BRIG_TYPE_F64: + return HSAIL::LD_F64; + case BRIG_TYPE_U8: + return HSAIL::LD_U8; + case BRIG_TYPE_S8: + return HSAIL::LD_S8; + case BRIG_TYPE_U16: + return HSAIL::LD_U16; + case BRIG_TYPE_S16: + return HSAIL::LD_S16; + default: + llvm_unreachable("Unhandled load type"); + } +} + +static unsigned getRArgLoadBrigOpcode(BrigType BT) { + switch (BT) { + case BRIG_TYPE_U32: + return HSAIL::RARG_LD_U32; + case BRIG_TYPE_S32: + return HSAIL::RARG_LD_S32; + case BRIG_TYPE_F32: + return HSAIL::RARG_LD_F32; + case BRIG_TYPE_U64: + return HSAIL::RARG_LD_U64; + case BRIG_TYPE_S64: + return HSAIL::RARG_LD_S64; + case BRIG_TYPE_F64: + return HSAIL::RARG_LD_F64; + case BRIG_TYPE_U8: + return HSAIL::RARG_LD_U8; + case BRIG_TYPE_S8: + return HSAIL::RARG_LD_S8; + case BRIG_TYPE_U16: + return HSAIL::RARG_LD_U16; + case BRIG_TYPE_S16: + return HSAIL::RARG_LD_S16; + default: + llvm_unreachable("Unhandled load type"); + } +} + +static unsigned getStoreBrigOpcode(BrigType BT) { + switch (BT) { + case BRIG_TYPE_U32: + return HSAIL::ST_U32; + case BRIG_TYPE_F32: + return HSAIL::ST_F32; + case BRIG_TYPE_U64: + return HSAIL::ST_U64; + case BRIG_TYPE_F64: + return HSAIL::ST_F64; + case BRIG_TYPE_U8: + return HSAIL::ST_U8; + case BRIG_TYPE_U16: + return HSAIL::ST_U16; + default: + llvm_unreachable("Unhandled load type"); + } +} + +SDNode *HSAILDAGToDAGISel::SelectArgLd(MemSDNode *Node) const { + bool IsRetLd = Node->getConstantOperandVal(3); + bool IsSext = Node->getConstantOperandVal(4); + + SDValue Base, Reg, Offset; + if (!SelectAddr(Node->getOperand(1), Base, Reg, Offset)) + return nullptr; + + MVT MemVT = Node->getMemoryVT().getSimpleVT(); + BrigType BT = getBrigType(MemVT.SimpleTy, IsSext); + + SDLoc SL(Node); + + SDValue Ops[10] = { + Base, Reg, Offset, + CurDAG->getTargetConstant(BT, SL, MVT::i32), // TypeLength + CurDAG->getTargetConstant(Node->getAddressSpace(), SL, MVT::i32), // segment + CurDAG->getTargetConstant(Node->getAlignment(), SL, MVT::i32), // align + Node->getOperand(2), // width + CurDAG->getTargetConstant(0, SL, MVT::i1), // mask + Node->getOperand(0), // Chain + SDValue() + }; + + ArrayRef OpsArr = makeArrayRef(Ops); + + if (Node->getNumOperands() == 6) + Ops[9] = Node->getOperand(5); + else + OpsArr = OpsArr.drop_back(1); + + unsigned Opcode = IsRetLd ? getRArgLoadBrigOpcode(BT) : getLoadBrigOpcode(BT); + + return CurDAG->SelectNodeTo(Node, Opcode, Node->getVTList(), OpsArr); +} + +SDNode *HSAILDAGToDAGISel::SelectArgSt(MemSDNode *Node) const { + SDValue Base, Reg, Offset; + if (!SelectAddr(Node->getOperand(2), Base, Reg, Offset)) + return nullptr; + + SDLoc SL(Node); + + MVT MemVT = Node->getMemoryVT().getSimpleVT(); + BrigType BT = getBrigType(MemVT.SimpleTy, false); + + SDValue Ops[9] = { + Node->getOperand(1), + Base, Reg, Offset, + CurDAG->getTargetConstant(BT, SL, MVT::i32), // TypeLength + CurDAG->getTargetConstant(Node->getAddressSpace(), SL, MVT::i32), // segment + CurDAG->getTargetConstant(Node->getAlignment(), SL, MVT::i32), // align + Node->getOperand(0), // Chain + SDValue() + }; + + ArrayRef OpsArr = makeArrayRef(Ops); + + if (Node->getNumOperands() == 4) + Ops[8] = Node->getOperand(3); + else + OpsArr = OpsArr.drop_back(1); + + return CurDAG->SelectNodeTo(Node, getStoreBrigOpcode(BT), Node->getVTList(), + OpsArr); +} + +FunctionPass *llvm::createHSAILISelDag(TargetMachine &TM) { + return new HSAILDAGToDAGISel(TM); +} Index: lib/Target/HSAIL/HSAILISelLowering.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILISelLowering.h @@ -0,0 +1,228 @@ +//===---- HSAILISelLowering.h - HSAIL DAG Lowering Interface ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes how to lower LLVM code to machine code. This has two +// main components: +// +// 1. Which ValueTypes are natively supported by the target. +// 2. Which operations are supported for supported ValueTypes. +// 3. Cost thresholds for alternative implementations of certain operations. +// +// In addition it has a few other components, like information about FP +// immediates. +// +//===----------------------------------------------------------------------===// + +#ifndef _HSAIL_LOWERING_H_ +#define _HSAIL_LOWERING_H_ + +#include "HSAILRegisterInfo.h" +#include "HSAILMachineFunctionInfo.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" + +namespace llvm { + +class HSAILSubtarget; + +class HSAILTargetLowering : public TargetLowering { +private: + const HSAILSubtarget *Subtarget; + const HSAILRegisterInfo *RegInfo; + const DataLayout *DL; + +public: + explicit HSAILTargetLowering(HSAILTargetMachine &TM, + const HSAILSubtarget &ST); + virtual ~HSAILTargetLowering(); + + EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override; + + const TargetRegisterClass *getRepRegClassFor(MVT VT) const override; + + uint8_t getRepRegClassCostFor(MVT VT) const override; + + bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; + + bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace = 0, + unsigned Align = 1, + bool * /*Fast*/ = nullptr) const override; + + unsigned getJumpTableEncoding() const override; + + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; + + unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const SelectionDAG &DAG, + unsigned Depth = 0) const override; + + SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + + bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override; + + bool isLoadBitCastBeneficial(EVT load, EVT bitcast) const override; + + virtual bool + isVectorToScalarLoadStoreWidenBeneficial(unsigned Width, EVT WidenVT, + const MemSDNode *N) const; + +protected: + /// Recursively lower a single argument. + /// Either Ins or Outs must non-zero, which means we are doing argument load + /// or store. + /// ArgNo is an index to InVals and OutVals, which is advanced after the call. + /// AS is an address space of argument, either arg or kernarg + /// ParamPtr is a pointer value for argument to load from or store to. + /// Offset is a value which has to be added to the pointer. + /// If InFlag is present glue all operations. + /// If ChainLink is true chain link all operations. + /// Returns last operation value. + SDValue LowerArgument(SDValue Chain, SDValue InFlag, bool ChainLink, + const SmallVectorImpl *Ins, + const SmallVectorImpl *Outs, SDLoc dl, + SelectionDAG &DAG, SmallVectorImpl *InVals, + unsigned &ArgNo, Type *type, unsigned AS, + const char *ParamName, SDValue ParamPtr, + const SmallVectorImpl *OutVals = nullptr, + bool isRetArgLoad = false, + const AAMDNodes & = AAMDNodes(), + uint64_t offset = 0) const; + +public: + SDValue getArgLoad(SelectionDAG &DAG, SDLoc SL, EVT ArgVT, Type *Ty, + bool isSExt, unsigned AddressSpace, SDValue Chain, + SDValue Ptr, SDValue InFlag, unsigned index, + bool IsRetArgLoad = false, uint64_t Offset = 0) const; + + SDValue getArgStore(SelectionDAG &DAG, SDLoc SL, EVT ArgVT, Type *Ty, + unsigned AddressSpace, SDValue Chain, SDValue Ptr, + SDValue Value, unsigned Index, SDValue InFlag, + const AAMDNodes &AAInfo = AAMDNodes(), + uint64_t Offset = 0) const; + + //===--------------------------------------------------------------------===// + // Lowering methods - These methods must be implemented by targets so that + // the SelectionDAGLowering code knows how to lower these. + + SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, + const SmallVectorImpl &ArgInfo, + SDLoc dl, SelectionDAG &DAG, const CCValAssign &VA, + MachineFrameInfo *MFI, unsigned i) const; + + SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl &Ins, + SDLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) const override; + + SDValue LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const override; + + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, SDLoc dl, + SelectionDAG &DAG) const override; + + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + + void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG) const override; + + const char *getTargetNodeName(unsigned Opcode) const override; + + /// Custom lowering methods + SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerLdKernargIntrinsic(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; + + SDValue lowerSamplerInitializerOperand(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerROTR(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const; + + //===--------------------------------------------------------------------===// + // Instruction Emitting Hooks + // + bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; + + bool isZExtFree(Type *Ty1, Type *Ty2) const override; + + bool isZExtFree(EVT VT1, EVT VT2) const override; + + bool isFAbsFree(EVT VT) const override; + bool isFNegFree(EVT VT) const override; + + bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; + + bool isLegalICmpImmediate(int64_t Imm) const override; + + MVT getScalarShiftAmountTy(EVT LHSTy) const override; + + bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; + + void AdjustInstrPostInstrSelection(MachineInstr *MI, + SDNode *Node) const override; +}; + +namespace HSAILISD { +// HSAIL Specific DAG Nodes +enum { + // Start the numbering where the builtin ops leave off. + FIRST_NUMBER = ISD::BUILTIN_OP_END, + CALL, // Function call based on a single integer + RET, + SMAX, + UMAX, + SMIN, + UMIN, + FRACT, + NFMA, + UMAD, + SMAD, + UMUL24, + SMUL24, + UMAD24, + SMAD24, + BITSELECT, + SBITEXTRACT, + UBITEXTRACT, + FLDEXP, + CLASS, + LDA, + ACTIVELANEPERMUTE, + ACTIVELANEID, + ACTIVELANECOUNT, + ACTIVELANEMASK, + KERNARGBASEPTR, + SEGMENTP, + + FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, + + // Load and store of arguments. Main purpose is to add glue to what would + // be a generic load / store. + ARG_LD, + ARG_ST +}; +} +} // end llvm namespace + +#endif // _HSAIL_LOWERING_H_ Index: lib/Target/HSAIL/HSAILISelLowering.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILISelLowering.cpp @@ -0,0 +1,1917 @@ +//===-- HSAILISelLowering.cpp - HSAIL DAG Lowering Implementation ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that HSAIL uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hsail-isel" +#include "HSAIL.h" +#include "HSAILBrigDefs.h" +#include "HSAILInstrInfo.h" +#include "HSAILISelLowering.h" +#include "HSAILMachineFunctionInfo.h" +#include "HSAILSubtarget.h" +#include "HSAILTargetMachine.h" +#include "HSAILUtilityFunctions.h" +#include "HSAILOpaqueTypes.h" + +#include "llvm/ADT/StringExtras.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/raw_ostream.h" + +#include "HSAILGenInstrInfo.inc" + +using namespace llvm; +using namespace dwarf; + +HSAILTargetLowering::HSAILTargetLowering(HSAILTargetMachine &TM, + const HSAILSubtarget &ST) + : TargetLowering(TM), + Subtarget(&ST) { + // HSAIL uses a -1 to store a Boolean value as an int. For example, + // see the return values of the cmp instructions. This also requires + // that we never use a cvt instruction for converting a Boolean to a + // larger integer, because HSAIL cvt uses a zext when the source is + // b1. Due to the setting below, LLVM will ensure that all such + // conversions are done with the sext instruction. + setBooleanContents(ZeroOrNegativeOneBooleanContent); + + RegInfo = Subtarget->getRegisterInfo(); + DL = getDataLayout(); + + // Set up the register classes. + addRegisterClass(MVT::i32, &HSAIL::GPR32RegClass); + addRegisterClass(MVT::i64, &HSAIL::GPR64RegClass); + addRegisterClass(MVT::f32, &HSAIL::GPR32RegClass); + addRegisterClass(MVT::f64, &HSAIL::GPR64RegClass); + addRegisterClass(MVT::i1, &HSAIL::CRRegClass); + + setOperationAction(ISD::FRINT, MVT::f32, Legal); + setOperationAction(ISD::FRINT, MVT::f64, Legal); + setOperationAction(ISD::FFLOOR, MVT::f32, Legal); + setOperationAction(ISD::FFLOOR, MVT::f64, Legal); + setOperationAction(ISD::FCEIL, MVT::f32, Legal); + setOperationAction(ISD::FCEIL, MVT::f64, Legal); + setOperationAction(ISD::FTRUNC, MVT::f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::f64, Legal); + setOperationAction(ISD::FMINNUM, MVT::f32, Legal); + setOperationAction(ISD::FMINNUM, MVT::f64, Legal); + setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); + setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); + + setOperationAction(ISD::BSWAP, MVT::i32, Custom); + setOperationAction(ISD::BSWAP, MVT::i64, Expand); + setOperationAction(ISD::ADD, MVT::i1, Custom); + setOperationAction(ISD::ROTL, MVT::i1, Expand); + setOperationAction(ISD::ROTL, MVT::i32, Custom); + setOperationAction(ISD::ROTL, MVT::i64, Expand); + setOperationAction(ISD::ROTR, MVT::i1, Expand); + setOperationAction(ISD::ROTR, MVT::i32, Custom); + setOperationAction(ISD::ROTR, MVT::i64, Expand); + + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); + + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Expand); + + setOperationAction(ISD::BR_CC, MVT::i32, Expand); + setOperationAction(ISD::BR_CC, MVT::i64, Expand); + setOperationAction(ISD::BR_CC, MVT::f32, Expand); + setOperationAction(ISD::BR_CC, MVT::f64, Expand); + setOperationAction(ISD::BR_CC, MVT::i1, Expand); + + setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i1, Expand); + + setOperationAction(ISD::SELECT, MVT::f64, Promote); + AddPromotedToType(ISD::SELECT, MVT::f64, MVT::i64); + + setOperationAction(ISD::SELECT, MVT::f32, Promote); + AddPromotedToType(ISD::SELECT, MVT::f32, MVT::i32); + + setOperationAction(ISD::GlobalAddress, MVT::i32, Legal); + setOperationAction(ISD::GlobalAddress, MVT::i64, Legal); + + setOperationAction(ISD::ConstantFP, MVT::f64, Legal); + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); + setOperationAction(ISD::Constant, MVT::i32, Legal); + setOperationAction(ISD::Constant, MVT::i64, Legal); + + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + + for (MVT VT : MVT::fp_valuetypes()) + setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); + + for (MVT VT : MVT::fp_vector_valuetypes()) { + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Expand); + } + + for (MVT VT : MVT::integer_valuetypes()) { + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i32, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom); + + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand); + + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom); + } + + for (MVT VT : MVT::integer_vector_valuetypes()) { + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v1i32, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i32, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i32, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v16i32, Expand); + + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v1i16, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i16, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v16i16, Expand); + + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v16i8, Expand); + + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v1i16, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i16, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i16, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v8i16, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v16i16, Expand); + + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i8, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i8, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v8i8, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v16i8, Expand); + + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v1i32, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i32, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i32, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v8i32, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v16i32, Expand); + + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v1i32, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i32, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i32, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v16i32, Expand); + + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v1i16, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i16, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v16i16, Expand); + + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v16i8, Expand); + } + + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand); + setTruncStoreAction(MVT::v4f64, MVT::v4f32, Expand); + setTruncStoreAction(MVT::i64, MVT::i32, Expand); + setTruncStoreAction(MVT::i64, MVT::v1i32, Expand); + setTruncStoreAction(MVT::i64, MVT::v2i32, Expand); + setTruncStoreAction(MVT::i64, MVT::v4i32, Expand); + setTruncStoreAction(MVT::i64, MVT::v8i32, Expand); + setTruncStoreAction(MVT::i64, MVT::v16i32, Expand); + setTruncStoreAction(MVT::i64, MVT::i16, Expand); + setTruncStoreAction(MVT::i64, MVT::v1i16, Expand); + setTruncStoreAction(MVT::i64, MVT::v2i16, Expand); + setTruncStoreAction(MVT::i64, MVT::v4i16, Expand); + setTruncStoreAction(MVT::i64, MVT::v8i16, Expand); + setTruncStoreAction(MVT::i64, MVT::v16i16, Expand); + setTruncStoreAction(MVT::i64, MVT::i8, Expand); + setTruncStoreAction(MVT::i64, MVT::v2i8, Expand); + setTruncStoreAction(MVT::i64, MVT::v4i8, Expand); + setTruncStoreAction(MVT::i64, MVT::v8i8, Expand); + setTruncStoreAction(MVT::i64, MVT::v16i8, Expand); + + setOperationAction(ISD::STORE, MVT::i1, Custom); + setOperationAction(ISD::LOAD, MVT::i1, Custom); + + setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Custom); + + setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); + + setHasMultipleConditionRegisters(true); + setJumpIsExpensive(true); + setSelectIsExpensive(true); + setPow2SDivIsCheap(false); + setPrefLoopAlignment(0); + setSchedulingPreference((CodeGenOpt::None == TM.getOptLevel()) ? Sched::Source + : Sched::ILP); +#ifdef _DEBUG + const char *pScheduler = std::getenv("AMD_DEBUG_HSAIL_PRE_RA_SCHEDULER"); + if (pScheduler) { + if (strcmp(pScheduler, "ilp") == 0) { + printf("Overriding pre-RA scheduler with %s\n", pScheduler); + setSchedulingPreference(Sched::ILP); + } else if (strcmp(pScheduler, "regpressure") == 0) { + printf("Overriding pre-RA scheduler with %s\n", pScheduler); + setSchedulingPreference(Sched::RegPressure); + } else if (strcmp(pScheduler, "hybrid") == 0) { + printf("Overriding pre-RA scheduler with %s\n", pScheduler); + setSchedulingPreference(Sched::Hybrid); + } + } +#endif + + computeRegisterProperties(ST.getRegisterInfo()); + + MaxStoresPerMemcpy = 4096; + MaxStoresPerMemmove = 4096; + MaxStoresPerMemset = 4096; +} + +HSAILTargetLowering::~HSAILTargetLowering() {} + +EVT HSAILTargetLowering::getSetCCResultType(LLVMContext &Context, + EVT VT) const { + return MVT::i1; +} + +const TargetRegisterClass * +HSAILTargetLowering::getRepRegClassFor(MVT VT) const { + switch (VT.SimpleTy) { + case MVT::i64: + case MVT::f64: + return &HSAIL::GPR64RegClass; + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::f32: + return &HSAIL::GPR32RegClass; + case MVT::i1: + return &HSAIL::CRRegClass; + default: + llvm_unreachable("Cannot find register class for value type"); + break; + } + return nullptr; +} + +uint8_t HSAILTargetLowering::getRepRegClassCostFor(MVT VT) const { + return 1; +} + +bool HSAILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { + // All floating point types are legal for 32bit and 64bit types. + return (VT == EVT(MVT::f32) || VT == EVT(MVT::f64)); +} + +bool HSAILTargetLowering::allowsMisalignedMemoryAccesses(EVT, + unsigned AddrSpace, + unsigned Align, + bool *Fast) const { + return true; +} + +unsigned HSAILTargetLowering::getJumpTableEncoding() const { + return MachineJumpTableInfo::EK_BlockAddress; +} + +bool HSAILTargetLowering::isOffsetFoldingLegal( + const GlobalAddressSDNode *GA) const { + return true; +} + +unsigned HSAILTargetLowering::ComputeNumSignBitsForTargetNode( + SDValue Op, const SelectionDAG &DAG, unsigned Depth) const { + return 1; +} + +static SDValue PerformBitalignCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + unsigned IID) { + assert(IID == HSAILIntrinsic::HSAIL_bitalign_b32 || + IID == HSAILIntrinsic::HSAIL_bytealign_b32); + SDValue Opr0 = N->getOperand(1); + SDValue Opr1 = N->getOperand(2); + SDValue Opr2 = N->getOperand(3); + ConstantSDNode *SHR = dyn_cast(Opr2); + SelectionDAG &DAG = DCI.DAG; + SDLoc dl = SDLoc(N); + EVT VT = N->getValueType(0); + // fold bitalign_b32(x & c1, x & c1, c2) -> bitalign_b32(x, x, c2) & rotr(c1, + // c2) + if (SHR && (Opr0 == Opr1) && (Opr0.getOpcode() == ISD::AND)) { + if (ConstantSDNode *AndMask = + dyn_cast(Opr0.getOperand(1))) { + uint64_t and_mask = AndMask->getZExtValue(); + uint64_t shr_val = SHR->getZExtValue() & 31U; + if (IID == HSAILIntrinsic::HSAIL_bytealign_b32) + shr_val = (shr_val & 3U) << 3U; + and_mask = + ((and_mask >> shr_val) | (and_mask << (32U - shr_val))) & 0xffffffffu; + Opr0 = Opr0->getOperand(0); + return DAG.getNode(ISD::AND, SDLoc(Opr1), Opr1.getValueType(), + DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(IID, dl, MVT::i32), + Opr0, Opr0, Opr2), + DAG.getConstant(and_mask, dl, MVT::i32)); + } + } + // fold bitalign_b32(x, y, c) -> bytealign_b32(x, y, c/8) if c & 7 == 0 + if (SHR && (IID == HSAILIntrinsic::HSAIL_bitalign_b32)) { + uint64_t shr_val = SHR->getZExtValue() & 31U; + if ((shr_val & 7U) == 0) + return DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(HSAILIntrinsic::HSAIL_bytealign_b32, dl, MVT::i32), + Opr0, Opr1, DAG.getConstant(shr_val >> 3U, dl, MVT::i32)); + } + return SDValue(); +} + +static SDValue +PerformIntrinsic_Wo_ChainCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN); + unsigned IID = cast(N->getOperand(0))->getZExtValue(); + switch (IID) { + case HSAILIntrinsic::HSAIL_bitalign_b32: // fall-through + case HSAILIntrinsic::HSAIL_bytealign_b32: + return PerformBitalignCombine(N, DCI, IID); + } + return SDValue(); +} + +SDValue HSAILTargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + switch (N->getOpcode()) { + case ISD::INTRINSIC_WO_CHAIN: + return PerformIntrinsic_Wo_ChainCombine(N, DCI); + default: + break; + } + + return SDValue(); +} + +bool HSAILTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc, + EVT VT) const { + return (Opc == ISD::LOAD || Opc == ISD::STORE) && + (VT.getSimpleVT() == MVT::f32 || VT.getSimpleVT() == MVT::f64); +} + +//===--------------------------------------------------------------------===// + +/// n-th element of a vector has different alignment than a base. +/// This function returns alignment for n-th alement. + +// FIXME: It is probably not correct to use this. +static unsigned getElementAlignment(const DataLayout *DL, Type *Ty, + unsigned n) { + if (Ty->isArrayTy()) // FIXME + return getElementAlignment(DL, Ty->getArrayElementType(), 0); + + unsigned Alignment = DL->getABITypeAlignment(Ty); + if (n && (Alignment > 1)) { + Type *EltTy = Ty->getScalarType(); + unsigned ffs = 0; + while (((n >> ffs) & 1) == 0) + ffs++; + Alignment = (DL->getABITypeAlignment(EltTy) * (1 << ffs)) & (Alignment - 1); + } else { + if (OpaqueType OT = GetOpaqueType(Ty)) { + if (IsImage(OT) || OT == Sampler) + Alignment = 8; + } + } + return Alignment; +} + +SDValue +HSAILTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + SDLoc dl, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + HSAILMachineFunctionInfo *FuncInfo = MF.getInfo(); + HSAILParamManager &PM = FuncInfo->getParamManager(); + const Function *F = MF.getFunction(); + const FunctionType *funcType = F->getFunctionType(); + + SmallVector RetOps; + RetOps.push_back(Chain); + + Type *type = funcType->getReturnType(); + if (!type->isVoidTy()) { + Mangler Mang(getDataLayout()); + + // FIXME: The ParamManager here is only used for making sure the built + // string's name survives until code emission. We can't rely on the name + // here being added because unreachable functions with return values may not + // have return instructions. + const char *SymName = PM.getParamName( + PM.addReturnParam(type, PM.mangleArg(&Mang, F->getName()))); + + MVT ArgPtrVT = getPointerTy(HSAILAS::ARG_ADDRESS); + SDValue RetVariable = DAG.getTargetExternalSymbol(SymName, ArgPtrVT); + + AAMDNodes MD; // FIXME: What is this for? + // Value *mdops[] = { const_cast(F) }; + // MDNode *MD = MDNode::get(F->getContext(), mdops); + + unsigned ArgNo = 0; + LowerArgument(Chain, SDValue(), false, nullptr, &Outs, dl, DAG, &RetOps, ArgNo, + type, HSAILAS::ARG_ADDRESS, nullptr, RetVariable, &OutVals, + false, MD); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, RetOps); + } + + return DAG.getNode(HSAILISD::RET, dl, MVT::Other, Chain); +} + +/// Create kernel or function parameter scalar load and return its +/// value. AddressSpace used to determine if that is a kernel or function +/// argument. ArgVT specifies expected value type where 'Ty' refers to the real +/// argument type from function's signature. +/// +/// If the call sequence is not glued we may have unrelated to call instructions +/// scheduled into the argscope if intent was argscope use. This function +/// inserts a load or store argument instruction with glue. If InFlag contains +/// glue it is used for inbound glue. Glue is produced as a last result and can +/// be consumed at will of the caller. Offset operand is added to the offset +/// value calculated from index. +SDValue HSAILTargetLowering::getArgLoad(SelectionDAG &DAG, SDLoc SL, EVT ArgVT, + Type *Ty, bool isSExt, + unsigned AddressSpace, SDValue Chain, + SDValue Ptr, SDValue InFlag, + unsigned Index, bool IsRetArgLoad, + uint64_t Offset) const { + Type *EltTy = Ty; + + if (Ty->isArrayTy()) + EltTy = Ty->getArrayElementType(); + EltTy = EltTy->getScalarType(); + + MVT PtrVT = getPointerTy(AddressSpace); + PointerType *ArgPT = PointerType::get(EltTy, AddressSpace); + + // TODO_HSA: check if that works with packed structs, it can happen + // we would need to inhibit alignment calculation in that case. + Offset += DL->getTypeStoreSize(EltTy) * Index; + + EVT MemVT = ArgVT; + if (ArgVT == MVT::i1) + MemVT = MVT::i8; + + if (!Ptr && AddressSpace == HSAILAS::KERNARG_ADDRESS) { + // If the argument symbol is unknown, generate a kernargbaseptr instruction. + Ptr = DAG.getNode(HSAILISD::KERNARGBASEPTR, SL, PtrVT); + } + + unsigned Align = getElementAlignment(DL, Ty, Index); + unsigned Width = BRIG_WIDTH_1; + + // TODO_HSA: Due to problems with RT alignment of vectors we have to + // use element size instead of vector size for alignment. + // Fix when RT is fixed. + if (AddressSpace == HSAILAS::KERNARG_ADDRESS) { + Align = DL->getABITypeAlignment(EltTy); + Width = BRIG_WIDTH_ALL; + } + + SDValue PtrOffs = + DAG.getNode(ISD::ADD, SL, PtrVT, Ptr, DAG.getConstant(Offset, SL, PtrVT)); + + const SDValue Ops[] = { + Chain, + PtrOffs, + DAG.getTargetConstant(Width, SL, MVT::i32), + DAG.getTargetConstant(IsRetArgLoad, SL, MVT::i1), + DAG.getTargetConstant(isSExt, SL, MVT::i1), + InFlag + }; + + ArrayRef OpsArr = makeArrayRef(Ops); + if (!InFlag) + OpsArr = OpsArr.drop_back(1); + + EVT VT = (MemVT.getStoreSize() < 4) ? MVT::i32 : ArgVT; + SDVTList VTs = DAG.getVTList(VT, MVT::Other, MVT::Glue); + + MachinePointerInfo PtrInfo(UndefValue::get(ArgPT), Offset); + + SDValue Arg = DAG.getMemIntrinsicNode(HSAILISD::ARG_LD, SL, VTs, OpsArr, + MemVT, PtrInfo, Align, + false, // isVolatile + true, // ReadMem + false, // WriteMem + MemVT.getStoreSize()); // Size + + if (ArgVT == MVT::i1) { + const SDValue Ops[] = {DAG.getNode(ISD::TRUNCATE, SL, MVT::i1, Arg), + Arg.getValue(1), Arg.getValue(2)}; + + return DAG.getMergeValues(Ops, SL); + } + + return Arg; +} + +SDValue HSAILTargetLowering::getArgStore( + SelectionDAG &DAG, SDLoc SL, EVT ArgVT, Type *Ty, unsigned AddressSpace, + SDValue Chain, SDValue Ptr, SDValue Value, unsigned Index, SDValue InFlag, + const AAMDNodes &AAInfo, uint64_t Offset) const { + + Type *EltTy = Ty; + if (Ty->isArrayTy()) + EltTy = Ty->getArrayElementType(); + EltTy = EltTy->getScalarType(); + MVT PtrVT = getPointerTy(AddressSpace); + PointerType *ArgPT = PointerType::get(EltTy, AddressSpace); + // TODO_HSA: check if that works with packed structs, it can happen + // we would need to inhibit alignment calculation in that case. + Offset += DL->getTypeStoreSize(EltTy) * Index; + + EVT MemVT = ArgVT; + + if (ArgVT == MVT::i1) { + MemVT = MVT::i8; + Value = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Value); + } + + SDValue PtrOffs = + DAG.getNode(ISD::ADD, SL, PtrVT, Ptr, DAG.getConstant(Offset, SL, PtrVT)); + + unsigned Align = getElementAlignment(DL, Ty, Index); + // TODO_HSA: Due to problems with RT alignment of vectors we have to + // use element size instead of vector size for alignment. + // Fix when RT is fixed. + if (AddressSpace == HSAILAS::KERNARG_ADDRESS) + Align = DL->getABITypeAlignment(EltTy); + + SDValue Ops[] = {Chain, Value, PtrOffs, InFlag}; + + ArrayRef OpsArr = makeArrayRef(Ops); + if (!InFlag) + OpsArr = OpsArr.drop_back(1); + + SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); + MachinePointerInfo PtrInfo(UndefValue::get(ArgPT), Offset); + + return DAG.getMemIntrinsicNode(HSAILISD::ARG_ST, SL, VTs, OpsArr, MemVT, + PtrInfo, Align, + false, // isVolatile + false, // ReadMem + true, // WriteMem + MemVT.getStoreSize()); +} + +/// Recursively lower a single argument or its element. +/// Either Ins or Outs must non-zero, which means we are doing argument load +/// or store. +/// ArgNo is an index to InVals and OutVals, which is advanced after the call. +/// AS is an address space of argument, either arg or kernarg +/// ParamPtr is a pointer value for argument to load from or store to. +/// Offset is a value which has to be added to the pointer. +/// If InFlag gis present lue all operations. +/// If ChainLink is true chain link all operations. +/// Returns last operation value. +SDValue HSAILTargetLowering::LowerArgument( + SDValue Chain, SDValue InFlag, bool ChainLink, + const SmallVectorImpl *Ins, + const SmallVectorImpl *Outs, SDLoc dl, SelectionDAG &DAG, + SmallVectorImpl *InVals, unsigned &ArgNo, Type *type, unsigned AS, + const char *ParamName, SDValue ParamPtr, + const SmallVectorImpl *OutVals, bool isRetArgLoad, + const AAMDNodes &AAInfo, uint64_t offset) const { + assert((Ins == nullptr && Outs != nullptr) || (Ins != nullptr && Outs == nullptr)); + + Type *sType = type->getScalarType(); + + EVT argVT = Ins ? (*Ins)[ArgNo].VT : (*Outs)[ArgNo].VT; + + if (sType->isIntegerTy(8)) + argVT = MVT::i8; + else if (sType->isIntegerTy(16)) + argVT = MVT::i16; + + bool isLoad = Ins != nullptr; + bool hasFlag = InFlag.getNode() != nullptr; + SDValue ArgValue; + + const VectorType *VecTy = dyn_cast(type); + const ArrayType *ArrTy = dyn_cast(type); + if (VecTy || ArrTy) { + // This assumes that char and short vector elements are unpacked in Ins. + unsigned num_elem = + VecTy ? VecTy->getNumElements() : ArrTy->getNumElements(); + for (unsigned i = 0; i < num_elem; ++i) { + if (isLoad) { + bool IsSExt = (*Ins)[ArgNo].Flags.isSExt(); + ArgValue = getArgLoad(DAG, dl, argVT, type, IsSExt, AS, Chain, ParamPtr, + InFlag, i, isRetArgLoad, offset); + } else { + ArgValue = getArgStore(DAG, dl, argVT, type, AS, Chain, ParamPtr, + (*OutVals)[ArgNo], i, InFlag, AAInfo, offset); + } + + if (ChainLink) + Chain = ArgValue.getValue(isLoad ? 1 : 0); + + // Glue next vector loads regardless of input flag to favor vectorization. + InFlag = ArgValue.getValue(isLoad ? 2 : 1); + if (InVals) + InVals->push_back(ArgValue); + ArgNo++; + } + + return ArgValue; + } + + if (StructType *STy = dyn_cast(type)) { + const StructLayout *SL = DL->getStructLayout(STy); + unsigned num_elem = STy->getNumElements(); + for (unsigned i = 0; i < num_elem; ++i) { + ArgValue = LowerArgument(Chain, InFlag, ChainLink, Ins, Outs, dl, DAG, + InVals, ArgNo, STy->getElementType(i), AS, + ParamName, ParamPtr, OutVals, isRetArgLoad, + AAInfo, offset + SL->getElementOffset(i)); + if (ChainLink) + Chain = ArgValue.getValue(isLoad ? 1 : 0); + + if (hasFlag) + InFlag = ArgValue.getValue(isLoad ? 2 : 1); + } + return ArgValue; + } + + // Regular scalar load case. + if (isLoad) { + bool IsSExt = (*Ins)[ArgNo].Flags.isSExt(); + ArgValue = getArgLoad(DAG, dl, argVT, type, IsSExt, AS, Chain, ParamPtr, + InFlag, 0, isRetArgLoad, offset); + + } else { + ArgValue = getArgStore(DAG, dl, argVT, type, AS, Chain, ParamPtr, + (*OutVals)[ArgNo], 0, InFlag, AAInfo, offset); + } + + if (InVals) + InVals->push_back(ArgValue); + ArgNo++; + + return ArgValue; +} + +SDValue HSAILTargetLowering::LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, SDLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) const { + MachineFunction &MF = DAG.getMachineFunction(); + HSAILMachineFunctionInfo *FuncInfo = MF.getInfo(); + HSAILParamManager &PM = FuncInfo->getParamManager(); + unsigned AS = HSAIL::isKernelFunc(MF.getFunction()) ? HSAILAS::KERNARG_ADDRESS + : HSAILAS::ARG_ADDRESS; + MVT PtrTy = getPointerTy(AS); + + Mangler Mang(DL); + + // Map function param types to Ins. + Function::const_arg_iterator AI = MF.getFunction()->arg_begin(); + Function::const_arg_iterator AE = MF.getFunction()->arg_end(); + for (unsigned ArgNo = 0; AI != AE; ++AI) { + unsigned Param = PM.addArgumentParam( + AS, *AI, HSAILParamManager::mangleArg(&Mang, AI->getName())); + const char *ParamName = PM.getParamName(Param); + std::string md = (AI->getName() + ":" + ParamName + " ").str(); + FuncInfo->addMetadata("argmap:" + md, true); + SDValue ParamPtr = DAG.getTargetExternalSymbol(ParamName, PtrTy); + + // FIXME: What is this for? + // Value *mdops[] = { const_cast(&(*AI)) }; + // MDNode *ArgMD = MDNode::get(MF.getFunction()->getContext(), mdops); + + LowerArgument(Chain, SDValue(), false, &Ins, nullptr, dl, DAG, &InVals, ArgNo, + AI->getType(), AS, ParamName, ParamPtr, nullptr); + } + + return Chain; +} + +static BrigType getParamBrigType(Type *Ty, const DataLayout &DL, bool IsSExt) { + BrigType BT = HSAIL::getBrigType(Ty, DL, IsSExt); + if (BT == BRIG_TYPE_B1) { + assert(!IsSExt && "When does this happen?"); + return IsSExt ? BRIG_TYPE_S8 : BRIG_TYPE_U8; + } + + return BT; +} + +SDValue HSAILTargetLowering::LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const { + SelectionDAG &DAG = CLI.DAG; + SDLoc &dl = CLI.DL; + SmallVector &Outs = CLI.Outs; + SmallVector &OutVals = CLI.OutVals; + SmallVector &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + CLI.IsTailCall = false; + + MachineFunction &MF = DAG.getMachineFunction(); + HSAILParamManager &PM = + MF.getInfo()->getParamManager(); + Mangler Mang(DL); + + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), dl); + SDValue InFlag = Chain.getValue(1); + + const FunctionType *funcType = nullptr; + const Function *calleeFunc = nullptr; + const char *FuncName = nullptr; + + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, + // every direct call is) turn it into a TargetGlobalAddress/ + // TargetExternalSymbol + // node so that legalize doesn't hack it. + if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + unsigned AS = G->getAddressSpace(); + const GlobalValue *GV = G->getGlobal(); + Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(AS)); + + if (const GlobalAlias *GA = dyn_cast(GV)) + calleeFunc = cast(GA->getAliasee()); + else + calleeFunc = cast(GV); + + funcType = calleeFunc->getFunctionType(); + FuncName = GV->getName().data(); + } else + llvm_unreachable( + "Cannot lower call to a function which is not a global address"); + + assert(funcType != nullptr); + + SmallVector Ops; + SmallVector VarOps; + SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); + + Type *retType = funcType->getReturnType(); + SDValue RetValue; + if (!retType->isVoidTy()) { + MVT PtrVT = getPointerTy(HSAILAS::ARG_ADDRESS); + RetValue = DAG.getTargetExternalSymbol( + PM.getParamName( + PM.addCallRetParam(retType, PM.mangleArg(&Mang, FuncName))), + PtrVT); + + unsigned NElts; + Type *EmitTy = HSAIL::analyzeType(retType, NElts, *DL); + + BrigType BT = getParamBrigType(EmitTy, *DL, CLI.RetSExt); + + unsigned Align = HSAIL::getAlignTypeQualifier(retType, *DL, false); + + const SDValue ArgDeclOps[] = { + RetValue, + DAG.getTargetConstant(BT, dl, MVT::i32), + DAG.getTargetConstant(NElts, dl, PtrVT), + DAG.getTargetConstant(Align, dl, MVT::i32), + Chain, + InFlag + }; + + SDNode *ArgDeclNode = + DAG.getMachineNode(HSAIL::ARG_DECL, dl, VTs, ArgDeclOps); + + SDValue ArgDecl(ArgDeclNode, 0); + + Chain = SDValue(ArgDeclNode, 0); + InFlag = Chain.getValue(1); + + VarOps.push_back(RetValue); + } + + // Delimit return value and parameters with 0 + VarOps.push_back(DAG.getTargetConstant(0, dl, MVT::i32)); + unsigned FirstArg = VarOps.size(); + + unsigned int j = 0, k = 0; + Function::const_arg_iterator ai; + Function::const_arg_iterator ae; + if (calleeFunc) { + ai = calleeFunc->arg_begin(); + ae = calleeFunc->arg_end(); + } + + MVT ArgPtrVT = getPointerTy(HSAILAS::ARG_ADDRESS); + + MDBuilder MDB(*DAG.getContext()); + for (FunctionType::param_iterator pb = funcType->param_begin(), + pe = funcType->param_end(); + pb != pe; ++pb, ++ai, ++k) { + Type *type = *pb; + + std::string ParamName; + if (calleeFunc && ai != ae) { + ParamName = PM.mangleArg(&Mang, ai->getName()); + } + if (ParamName.empty()) { + ParamName = "__param_p"; + ParamName.append(itostr(k)); + } + SDValue StParamValue = DAG.getTargetExternalSymbol( + PM.getParamName(PM.addCallArgParam(type, ParamName)), ArgPtrVT); + + unsigned NElts; + Type *EmitTy = HSAIL::analyzeType(type, NElts, *DL); + + // START array parameter declaration + BrigType BT = getParamBrigType(EmitTy, *DL, Outs[j].Flags.isSExt()); + + unsigned Align = HSAIL::getAlignTypeQualifier(type, *DL, false); + const SDValue ArgDeclOps[] = { + StParamValue, + DAG.getTargetConstant(BT, dl, MVT::i32), + DAG.getTargetConstant(NElts, dl, ArgPtrVT), + DAG.getTargetConstant(Align, dl, MVT::i32), + Chain, + InFlag + }; + + SDNode *ArgDeclNode = + DAG.getMachineNode(HSAIL::ARG_DECL, dl, VTs, ArgDeclOps); + Chain = SDValue(ArgDeclNode, 0); + InFlag = Chain.getValue(1); + + // END array parameter declaration + VarOps.push_back(StParamValue); + + for (; j < Outs.size() - 1; j++) { + if (Outs[j].OrigArgIndex != Outs[j + 1].OrigArgIndex) + break; + } + j++; + } + + j = k = 0; + for (FunctionType::param_iterator pb = funcType->param_begin(), + pe = funcType->param_end(); + pb != pe; ++pb, ++k) { + Type *type = *pb; + Chain = LowerArgument(Chain, InFlag, true, nullptr, &Outs, dl, DAG, nullptr, j, + type, HSAILAS::ARG_ADDRESS, nullptr, + VarOps[FirstArg + k], &OutVals); + InFlag = Chain.getValue(1); + } + + // If this is a direct call, pass the chain and the callee + if (Callee.getNode()) { + Ops.push_back(Callee); + } + + // Add actual arguments to the end of the list + for (unsigned int i = 0, e = VarOps.size(); i != e; ++i) { + Ops.push_back(VarOps[i]); + } + + Ops.push_back(Chain); + Ops.push_back(InFlag); + + SDNode *Call = DAG.getMachineNode(HSAIL::CALL, dl, VTs, Ops); + Chain = SDValue(Call, 0); + + InFlag = Chain.getValue(1); + + // Read return value. + if (!Ins.empty()) { + j = 0; + Chain = LowerArgument(Chain, InFlag, true, &Ins, nullptr, dl, DAG, &InVals, j, + retType, HSAILAS::ARG_ADDRESS, nullptr, RetValue, nullptr, + true); + InFlag = Chain.getValue(2); + Chain = Chain.getValue(1); + } + + // Create the CALLSEQ_END node + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true), + DAG.getIntPtrConstant(0, dl, true), InFlag, dl); + return Chain; +} + +#define LOWER(A) \ + case ISD::A: \ + return Lower##A(Op, DAG) + +SDValue HSAILTargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { + switch (Op.getOpcode()) { + LOWER(INTRINSIC_WO_CHAIN); + LOWER(INTRINSIC_W_CHAIN); + LOWER(ROTL); + LOWER(ROTR); + LOWER(BSWAP); + LOWER(ADD); + LOWER(LOAD); + LOWER(STORE); + LOWER(ATOMIC_LOAD); + LOWER(ATOMIC_STORE); + break; + default: + Op.getNode()->dump(); + assert(0 && "Custom lowering code for this" + "instruction is not implemented yet!"); + break; + } + return Op; +} + +/// ReplaceNodeResults - This callback is invoked when a node result type is +/// illegal for the target, and the operation was registered to use 'custom' +/// lowering for that result type. The target places new result values for +/// the node in Results (their number and types must exactly match those of +/// the original return values of the node), or leaves Results empty, which +/// indicates that the node is not to be custom lowered after all. +/// +/// If the target has no operations that require custom lowering, it need not +/// implement this. The default implementation aborts. +void HSAILTargetLowering::ReplaceNodeResults(SDNode *N, + SmallVectorImpl &Results, + SelectionDAG &DAG) const { + return TargetLowering::ReplaceNodeResults(N, Results, DAG); +} + +/// getTargetNodeName() - This method returns the name of a target specific +/// DAG node. +const char *HSAILTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + default: + llvm_unreachable("Unknown target-node"); + return nullptr; + case HSAILISD::CALL: + return "HSAILISD::CALL"; + case HSAILISD::RET: + return "HSAILISD::RET"; + case HSAILISD::SMAX: + return "HSAILISD::SMAX"; + case HSAILISD::UMAX: + return "HSAILISD::UMAX"; + case HSAILISD::SMIN: + return "HSAILISD::SMIN"; + case HSAILISD::UMIN: + return "HSAILISD::UMIN"; + case HSAILISD::FRACT: + return "HSAILISD::FRACT"; + case HSAILISD::NFMA: + return "HSAILISD::NFMA"; + case HSAILISD::UMAD: + return "HSAILISD::UMAD"; + case HSAILISD::SMAD: + return "HSAILISD::SMAD"; + case HSAILISD::UMUL24: + return "HSAILISD::UMUL24"; + case HSAILISD::SMUL24: + return "HSAILISD::SMUL24"; + case HSAILISD::UMAD24: + return "HSAILISD::UMAD24"; + case HSAILISD::SMAD24: + return "HSAILISD::SMAD24"; + case HSAILISD::BITSELECT: + return "HSAILISD::BITSELECT"; + case HSAILISD::SBITEXTRACT: + return "HSAILISD::SBITEXTRACT"; + case HSAILISD::UBITEXTRACT: + return "HSAILISD::UBITEXTRACT"; + case HSAILISD::FLDEXP: + return "HSAILISD::FLDEXP"; + case HSAILISD::CLASS: + return "HSAILISD::CLASS"; + case HSAILISD::LDA: + return "HSAILISD::LDA"; + case HSAILISD::ACTIVELANEPERMUTE: + return "HSAILISD::ACTIVELANEPERMUTE"; + case HSAILISD::ACTIVELANEID: + return "HSAILISD::ACTIVELANEID"; + case HSAILISD::ACTIVELANECOUNT: + return "HSAILISD::ACTIVELANECOUNT"; + case HSAILISD::ACTIVELANEMASK: + return "HSAILISD::ACTIVELANEMASK"; + case HSAILISD::KERNARGBASEPTR: + return "HSAILISD::KERNARGBASEPTR"; + case HSAILISD::SEGMENTP: + return "HSAILISD::SEGMENTP"; + case HSAILISD::ARG_LD: + return "HSAILISD::ARG_LD"; + case HSAILISD::ARG_ST: + return "HSAILISD::ARG_ST"; + } +} + +//===--------------------------------------------------------------------===// +// Custom lowering methods +// + +SDValue HSAILTargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl = SDLoc(Op); + EVT VT = Op.getValueType(); + + if (VT != MVT::i1) { + return Op; + } + const SDValue src = Op.getOperand(0).getOperand(0); + EVT srcVT = src.getValueType(); + if (Op.getOperand(0).getOpcode() != ISD::TRUNCATE) + return Op; + + SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Op.getOperand(1)); + SDValue Zext1 = DAG.getNode(ISD::ZERO_EXTEND, dl, srcVT, Op.getOperand(0)); + SDValue add_p = DAG.getNode(ISD::ADD, dl, srcVT, Zext1, Zext); + SDValue Zext2 = DAG.getNode(ISD::TRUNCATE, dl, VT, add_p); + return Zext2; +} + +static bool isRdimage(unsigned IntNo) { + switch (IntNo) { + case HSAILIntrinsic::HSAIL_rd_imgf_1d_s32: + case HSAILIntrinsic::HSAIL_rd_imgf_1d_f32: + case HSAILIntrinsic::HSAIL_rd_imgi_1d_s32: + case HSAILIntrinsic::HSAIL_rd_imgi_1d_f32: + case HSAILIntrinsic::HSAIL_rd_imgui_1d_s32: + case HSAILIntrinsic::HSAIL_rd_imgui_1d_f32: + // read image 1d array + case HSAILIntrinsic::HSAIL_rd_imgf_1da_s32: + case HSAILIntrinsic::HSAIL_rd_imgf_1da_f32: + case HSAILIntrinsic::HSAIL_rd_imgi_1da_s32: + case HSAILIntrinsic::HSAIL_rd_imgi_1da_f32: + case HSAILIntrinsic::HSAIL_rd_imgui_1da_s32: + case HSAILIntrinsic::HSAIL_rd_imgui_1da_f32: + // read image 2d + case HSAILIntrinsic::HSAIL_rd_imgf_2d_s32: + case HSAILIntrinsic::HSAIL_rd_imgf_2d_f32: + case HSAILIntrinsic::HSAIL_rd_imgi_2d_s32: + case HSAILIntrinsic::HSAIL_rd_imgi_2d_f32: + case HSAILIntrinsic::HSAIL_rd_imgui_2d_s32: + case HSAILIntrinsic::HSAIL_rd_imgui_2d_f32: + // read image 2d array + case HSAILIntrinsic::HSAIL_rd_imgf_2da_s32: + case HSAILIntrinsic::HSAIL_rd_imgf_2da_f32: + case HSAILIntrinsic::HSAIL_rd_imgi_2da_s32: + case HSAILIntrinsic::HSAIL_rd_imgi_2da_f32: + case HSAILIntrinsic::HSAIL_rd_imgui_2da_s32: + case HSAILIntrinsic::HSAIL_rd_imgui_2da_f32: + // read image 3d + case HSAILIntrinsic::HSAIL_rd_imgf_3d_s32: + case HSAILIntrinsic::HSAIL_rd_imgf_3d_f32: + case HSAILIntrinsic::HSAIL_rd_imgi_3d_s32: + case HSAILIntrinsic::HSAIL_rd_imgi_3d_f32: + case HSAILIntrinsic::HSAIL_rd_imgui_3d_s32: + case HSAILIntrinsic::HSAIL_rd_imgui_3d_f32: + // read image 2d depth + case HSAILIntrinsic::HSAIL_rd_imgf_2ddepth_s32: + case HSAILIntrinsic::HSAIL_rd_imgf_2ddepth_f32: + // read image 2d array depth + case HSAILIntrinsic::HSAIL_rd_imgf_2dadepth_s32: + case HSAILIntrinsic::HSAIL_rd_imgf_2dadepth_f32: + return true; + } + + return false; +} + +SDValue HSAILTargetLowering::LowerLdKernargIntrinsic(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + HSAILMachineFunctionInfo *FuncInfo = MF.getInfo(); + HSAILParamManager &PM = FuncInfo->getParamManager(); + + EVT VT = Op.getValueType(); + Type *Ty = Type::getIntNTy(*DAG.getContext(), VT.getSizeInBits()); + SDValue Addr = Op.getOperand(1); + int64_t Offset = 0; + MVT PtrTy = getPointerTy(HSAILAS::KERNARG_ADDRESS); + AAMDNodes ArgMD; // FIXME: What is this for? + if (ConstantSDNode *CAddr = dyn_cast(Addr)) { + Offset = CAddr->getSExtValue(); + // Match a constant address argument to the parameter through functions's + // argument map (taking argument alignment into account). + // Match is not possible if we are accesing beyond a known kernel argument + // space, if we accessing from a non-inlined function, or if there is an + // opaque argument with unknwon size before requested offset. + unsigned Param = UINT_MAX; + if (HSAIL::isKernelFunc(MF.getFunction())) + Param = PM.getParamByOffset(Offset); + + if (Param != UINT_MAX) { + Addr = DAG.getTargetExternalSymbol(PM.getParamName(Param), PtrTy); + // Value *mdops[] = { const_cast(PM.getParamArg(param)) }; + // ArgMD = MDNode::get(MF.getFunction()->getContext(), mdops); + } else { + Addr = SDValue(); + } + } + + SDValue Chain = DAG.getEntryNode(); + return getArgLoad(DAG, SDLoc(Op), VT, Ty, false, HSAILAS::KERNARG_ADDRESS, + Chain, Addr, SDValue(), 0, false, Offset); +} + +SDValue HSAILTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + unsigned IntrID = cast(Op->getOperand(0))->getZExtValue(); + SDLoc SL(Op); + + // FIXME: This is for compatability with old, custom HSAIL intrinsics. These + // should be removed once users are updated to use the LLVM intrinsics. + switch (IntrID) { + case HSAILIntrinsic::HSAIL_get_global_id: { + ConstantSDNode *Dim = dyn_cast(Op.getOperand(1)); + if (!Dim || Dim->getZExtValue() > 2) + return DAG.getUNDEF(Op.getValueType()); + return Op; + } + + case HSAILIntrinsic::HSAIL_abs_f32: + return DAG.getNode(ISD::FABS, SL, MVT::f32, Op.getOperand(1)); + + case HSAILIntrinsic::HSAIL_abs_f64: + return DAG.getNode(ISD::FABS, SL, MVT::f64, Op.getOperand(1)); + + case HSAILIntrinsic::HSAIL_rnd_f32: + return DAG.getNode(ISD::FRINT, SL, MVT::f32, Op.getOperand(1)); + + case HSAILIntrinsic::HSAIL_rnd_f64: + return DAG.getNode(ISD::FRINT, SL, MVT::f64, Op.getOperand(1)); + + case HSAILIntrinsic::HSAIL_floor_f32: + return DAG.getNode(ISD::FFLOOR, SL, MVT::f32, Op.getOperand(1)); + + case HSAILIntrinsic::HSAIL_floor_f64: + return DAG.getNode(ISD::FFLOOR, SL, MVT::f64, Op.getOperand(1)); + + case HSAILIntrinsic::HSAIL_ceil_f32: + return DAG.getNode(ISD::FCEIL, SL, MVT::f32, Op.getOperand(1)); + + case HSAILIntrinsic::HSAIL_ceil_f64: + return DAG.getNode(ISD::FCEIL, SL, MVT::f64, Op.getOperand(1)); + + case HSAILIntrinsic::HSAIL_trunc_f32: + return DAG.getNode(ISD::FTRUNC, SL, MVT::f32, Op.getOperand(1)); + + case HSAILIntrinsic::HSAIL_trunc_f64: + return DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Op.getOperand(1)); + + case HSAILIntrinsic::HSAIL_fract_f32: + return DAG.getNode(HSAILISD::FRACT, SL, MVT::f32, Op.getOperand(1)); + + case HSAILIntrinsic::HSAIL_fract_f64: + return DAG.getNode(HSAILISD::FRACT, SL, MVT::f64, Op.getOperand(1)); + + case HSAILIntrinsic::HSAIL_copysign_f32: + return DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f32, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_copysign_f64: + return DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_min_f32: + return DAG.getNode(ISD::FMINNUM, SL, MVT::f32, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_min_f64: + return DAG.getNode(ISD::FMINNUM, SL, MVT::f64, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_max_f32: + return DAG.getNode(ISD::FMAXNUM, SL, MVT::f32, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_max_f64: + return DAG.getNode(ISD::FMAXNUM, SL, MVT::f64, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_fma_f32: + return DAG.getNode(ISD::FMA, SL, MVT::f32, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); + + case HSAILIntrinsic::HSAIL_fma_f64: + return DAG.getNode(ISD::FMA, SL, MVT::f64, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); + + case HSAILIntrinsic::HSAIL_popcount_u32_b32: + return DAG.getNode(ISD::CTPOP, SL, MVT::i32, Op.getOperand(1)); + + case HSAILIntrinsic::HSAIL_nfma_f32: + return DAG.getNode(HSAILISD::NFMA, SL, MVT::f32, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); + + case HSAILIntrinsic::HSAIL_nfma_f64: + return DAG.getNode(HSAILISD::NFMA, SL, MVT::f64, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); + + case HSAILIntrinsic::HSAIL_bitselect_u32: + return DAG.getNode(HSAILISD::BITSELECT, SL, MVT::i32, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); + + case HSAILIntrinsic::HSAIL_bitselect_u64: + return DAG.getNode(HSAILISD::BITSELECT, SL, MVT::i64, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); + + case HSAILIntrinsic::HSAIL_bfe: + return DAG.getNode(HSAILISD::UBITEXTRACT, SL, MVT::i32, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); + + case HSAILIntrinsic::HSAIL_ibfe: + return DAG.getNode(HSAILISD::SBITEXTRACT, SL, MVT::i32, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); + + // FIXME: There should be LLVM intrinsics for mulhs / mulhu. + case HSAILIntrinsic::HSAIL_mulhi_s32: + return DAG.getNode(ISD::MULHS, SL, MVT::i32, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_mulhi_s64: + return DAG.getNode(ISD::MULHS, SL, MVT::i64, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_mulhi_u32: + return DAG.getNode(ISD::MULHU, SL, MVT::i32, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_mulhi_u64: + return DAG.getNode(ISD::MULHU, SL, MVT::i64, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_mad_u64: + return DAG.getNode(HSAILISD::UMAD, SL, MVT::i64, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); + + case HSAILIntrinsic::HSAIL_mad_u32: + return DAG.getNode(HSAILISD::UMAD, SL, MVT::i32, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); + + case HSAILIntrinsic::HSAIL_max_s32: + return DAG.getNode(HSAILISD::SMAX, SL, MVT::i32, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_max_u32: + return DAG.getNode(HSAILISD::UMAX, SL, MVT::i32, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_min_s32: + return DAG.getNode(HSAILISD::SMIN, SL, MVT::i32, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_min_u32: + return DAG.getNode(HSAILISD::UMIN, SL, MVT::i32, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_max_s64: + return DAG.getNode(HSAILISD::SMAX, SL, MVT::i64, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_max_u64: + return DAG.getNode(HSAILISD::UMAX, SL, MVT::i64, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_min_s64: + return DAG.getNode(HSAILISD::SMIN, SL, MVT::i64, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_min_u64: + return DAG.getNode(HSAILISD::UMIN, SL, MVT::i64, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_mul24_s32: + return DAG.getNode(HSAILISD::SMUL24, SL, MVT::i32, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_mul24_u32: + return DAG.getNode(HSAILISD::UMUL24, SL, MVT::i32, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_mad24_s32: + return DAG.getNode(HSAILISD::SMAD24, SL, MVT::i32, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); + + case HSAILIntrinsic::HSAIL_mad24_u32: + return DAG.getNode(HSAILISD::UMAD24, SL, MVT::i32, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); + + case HSAILIntrinsic::HSAIL_gcn_fldexp_f32: + return DAG.getNode(HSAILISD::FLDEXP, SL, MVT::f32, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_gcn_fldexp_f64: + return DAG.getNode(HSAILISD::FLDEXP, SL, MVT::f64, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_class_f32: + case HSAILIntrinsic::HSAIL_class_f64: { + // FIXME: The intrinsic should be i1 to begin with. + SDValue Class = DAG.getNode(HSAILISD::CLASS, SL, MVT::i1, Op.getOperand(1), + Op.getOperand(2)); + return DAG.getNode(ISD::SIGN_EXTEND, SL, MVT::i32, Class); + } + + case HSAILIntrinsic::HSAIL_segmentp_global: { + return DAG.getNode( + HSAILISD::SEGMENTP, SL, MVT::i1, + DAG.getTargetConstant(HSAILAS::GLOBAL_ADDRESS, SL, MVT::i32), + DAG.getTargetConstant(0, SL, MVT::i1), Op.getOperand(1)); + } + case HSAILIntrinsic::HSAIL_segmentp_local: { + return DAG.getNode( + HSAILISD::SEGMENTP, SL, MVT::i1, + DAG.getTargetConstant(HSAILAS::GROUP_ADDRESS, SL, MVT::i32), + DAG.getTargetConstant(0, SL, MVT::i1), Op.getOperand(1)); + } + case HSAILIntrinsic::HSAIL_segmentp_private: { + return DAG.getNode( + HSAILISD::SEGMENTP, SL, MVT::i1, + DAG.getTargetConstant(HSAILAS::PRIVATE_ADDRESS, SL, MVT::i32), + DAG.getTargetConstant(0, SL, MVT::i1), Op.getOperand(1)); + } + case HSAILIntrinsic::HSAIL_ld_kernarg_u32: + case HSAILIntrinsic::HSAIL_ld_kernarg_u64: { + return LowerLdKernargIntrinsic(Op, DAG); + } + default: + return Op; + } +} + +SDValue HSAILTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + unsigned IntNo = cast(Op->getOperand(1))->getZExtValue(); + SDLoc SL(Op); + + if (isRdimage(IntNo)) + return lowerSamplerInitializerOperand(Op, DAG); + + switch (IntNo) { + case HSAILIntrinsic::HSAIL_activelanepermute_b32: { + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); + + const SDValue Ops[] = { + Op.getOperand(0), // Chain + DAG.getTargetConstant(BRIG_WIDTH_1, SL, MVT::i32), // width + Op.getOperand(2), // src0 + Op.getOperand(3), // src1 + Op.getOperand(4), // src2 + Op.getOperand(5) // src3 + }; + + return DAG.getNode(HSAILISD::ACTIVELANEPERMUTE, SL, VTs, Ops); + } + + case HSAILIntrinsic::HSAIL_activelanepermute_b64: { + SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other); + + const SDValue Ops[] = { + Op.getOperand(0), // Chain + DAG.getTargetConstant(BRIG_WIDTH_1, SL, MVT::i32), // width + Op.getOperand(2), // src0 + Op.getOperand(3), // src1 + Op.getOperand(4), // src2 + Op.getOperand(5) // src3 + }; + + return DAG.getNode(HSAILISD::ACTIVELANEPERMUTE, SL, VTs, Ops); + } + + case HSAILIntrinsic::HSAIL_activelanepermute_width_b32: { + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); + + const SDValue Ops[] = { + Op.getOperand(0), // Chain + DAG.getTargetConstant(BRIG_WIDTH_WAVESIZE, SL, MVT::i32), // width + Op.getOperand(2), // src0 + Op.getOperand(3), // src1 + Op.getOperand(4), // src2 + Op.getOperand(5) // src3 + }; + + return DAG.getNode(HSAILISD::ACTIVELANEPERMUTE, SL, VTs, Ops); + } + + case HSAILIntrinsic::HSAIL_activelanepermute_width_b64: { + SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other); + + const SDValue Ops[] = { + Op.getOperand(0), // Chain + DAG.getTargetConstant(BRIG_WIDTH_WAVESIZE, SL, MVT::i32), // width + Op.getOperand(2), // src0 + Op.getOperand(3), // src1 + Op.getOperand(4), // src2 + Op.getOperand(5) // src3 + }; + + return DAG.getNode(HSAILISD::ACTIVELANEPERMUTE, SL, VTs, Ops); + } + + case HSAILIntrinsic::HSAIL_activelaneid_u32: { + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); + + const SDValue Ops[] = { + Op.getOperand(0), // Chain + DAG.getTargetConstant(BRIG_WIDTH_1, SL, MVT::i32) // width + }; + + return DAG.getNode(HSAILISD::ACTIVELANEID, SL, VTs, Ops); + } + + case HSAILIntrinsic::HSAIL_activelaneid_width_u32: { + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); + + const SDValue Ops[] = { + Op.getOperand(0), // Chain + DAG.getTargetConstant(BRIG_WIDTH_WAVESIZE, SL, MVT::i32) // width + }; + + return DAG.getNode(HSAILISD::ACTIVELANEID, SL, VTs, Ops); + } + + case HSAILIntrinsic::HSAIL_activelanecount_u32_b1: { + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); + + const SDValue Ops[] = { + Op.getOperand(0), // Chain + DAG.getTargetConstant(BRIG_WIDTH_1, SL, MVT::i32), // width + Op.getOperand(2) + }; + + return DAG.getNode(HSAILISD::ACTIVELANECOUNT, SL, VTs, Ops); + } + + case HSAILIntrinsic::HSAIL_activelanecount_width_u32_b1: { + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); + + const SDValue Ops[] = { + Op.getOperand(0), // Chain + DAG.getTargetConstant(BRIG_WIDTH_WAVESIZE, SL, MVT::i32), // width + Op.getOperand(2) + }; + + return DAG.getNode(HSAILISD::ACTIVELANECOUNT, SL, VTs, Ops); + } + + case HSAILIntrinsic::HSAIL_activelanemask_v4_b64_b1: { + const SDValue Ops[] = { + Op.getOperand(0), // Chain + DAG.getTargetConstant(BRIG_WIDTH_1, SL, MVT::i32), // width + Op.getOperand(2) + }; + + return DAG.getNode(HSAILISD::ACTIVELANEMASK, SL, Op->getVTList(), Ops); + } + + case HSAILIntrinsic::HSAIL_activelanemask_v4_width_b64_b1: { + const SDValue Ops[] = { + Op.getOperand(0), // Chain + DAG.getTargetConstant(BRIG_WIDTH_WAVESIZE, SL, MVT::i32), // width + Op.getOperand(2) + }; + + return DAG.getNode(HSAILISD::ACTIVELANEMASK, SL, Op->getVTList(), Ops); + } + + default: + return Op; + } +} + +/// \brief Replace sampler initializer with sampler handle from +/// readonly segment, potentially creating a new handle. +SDValue +HSAILTargetLowering::lowerSamplerInitializerOperand(SDValue Op, + SelectionDAG &DAG) const { + const unsigned SAMPLER_ARG = 3; + SDValue sampler = Op.getOperand(SAMPLER_ARG); + + // The sampler operand is an initializer if it is constant and less than + // IMAGE_ARG_BIAS. + if (!isa(sampler)) + return Op; + + unsigned samplerConstant = cast(sampler)->getZExtValue(); + if (samplerConstant >= IMAGE_ARG_BIAS) + return Op; + + // This is a sampler initializer. + // Find or create sampler handle based on init val. + unsigned samplerHandleIndex = + Subtarget->getImageHandles()->findOrCreateSamplerHandle(samplerConstant); + + // Provided that this is simply int const we can assume it is not going to be + // changed, so we use readonly segment for the sampler. + // According to OpenCL spec samplers cannot be modified, so that is safe for + // OpenCL. If we are going to support modifiable or non-OpenCL samplers most + // likely the whole support code will need change. + Subtarget->getImageHandles()->getSamplerHandle(samplerHandleIndex)->setRO(); + + SDValue ops[16]; + for (unsigned i = 0; i < Op.getNumOperands(); i++) { + ops[i] = Op.getOperand(i); + } + + SDLoc SL(Op); + + // FIXME: Get correct address space pointer type. + SDValue Ops[] = { + DAG.getTargetConstant(samplerHandleIndex, SL, MVT::i32), + DAG.getRegister(HSAIL::NoRegister, getPointerTy()), + DAG.getTargetConstant(0, SL, MVT::i32), + DAG.getTargetConstant(BRIG_TYPE_SAMP, SL, MVT::i32), + DAG.getTargetConstant(BRIG_WIDTH_ALL, SL, MVT::i32), + DAG.getTargetConstant(1, SL, MVT::i1), // Const + DAG.getEntryNode() // Chain + }; + + EVT VT = sampler.getValueType(); + + // Don't use ptr32 since this is the readonly segment. + MachineSDNode *LDSamp = + DAG.getMachineNode(HSAIL::LD_SAMP, SDLoc(Op), VT, MVT::Other, Ops); + + MachineFunction &MF = DAG.getMachineFunction(); + MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1); + unsigned size = VT.getStoreSize(); + Type *PTy = VT.getTypeForEVT(*DAG.getContext()); + PointerType *PT = PointerType::get(PTy, HSAILAS::READONLY_ADDRESS); + MachinePointerInfo MPtrInfo(UndefValue::get(PT), size * samplerHandleIndex); + MemOp[0] = + MF.getMachineMemOperand(MPtrInfo, MachineMemOperand::MOLoad, size, size); + LDSamp->setMemRefs(MemOp, MemOp + 1); + + ops[SAMPLER_ARG] = SDValue(LDSamp, 0); + + DAG.UpdateNodeOperands(Op.getNode(), makeArrayRef(ops, Op.getNumOperands())); + + return Op; +} + +SDValue HSAILTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl = SDLoc(Op); + EVT VT = Op.getValueType(); + + if (VT != MVT::i32) { + return Op; + } + + SDValue IntrID + = DAG.getTargetConstant(HSAILIntrinsic::HSAIL_bitalign_b32, dl, MVT::i32); + const SDValue src0 = Op.getOperand(0); + const SDValue src1 = Op.getOperand(1); + const ConstantSDNode *shift = dyn_cast(src1); + return DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, dl, VT, + IntrID, + src0, src0, + shift ? DAG.getConstant(32 - (shift->getZExtValue() & 31), dl, MVT::i32) + : DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), src1)); +} + +SDValue HSAILTargetLowering::LowerROTR(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl = SDLoc(Op); + EVT VT = Op.getValueType(); + + if (VT != MVT::i32) { + return Op; + } + const SDValue src0 = Op.getOperand(0); + const SDValue src1 = Op.getOperand(1); + SDValue IntrID + = DAG.getTargetConstant(HSAILIntrinsic::HSAIL_bitalign_b32, dl, MVT::i32); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, IntrID, src0, src0, src1); +} + +SDValue HSAILTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl = SDLoc(Op); + EVT VT = Op.getValueType(); + + if (VT != MVT::i32) { + return Op; + } + const SDValue src = Op.getOperand(0); + const SDValue opr0 = DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(HSAILIntrinsic::HSAIL_bytealign_b32, dl, MVT::i32), + src, src, DAG.getConstant(3, dl, MVT::i32)); + const SDValue opr1 = DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(HSAILIntrinsic::HSAIL_bytealign_b32, dl, MVT::i32), + src, src, DAG.getConstant(1, dl, MVT::i32)); + return DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getTargetConstant(HSAILIntrinsic::HSAIL_bitselect_u32, dl, MVT::i32), + DAG.getConstant(0x00ff00ff, dl, VT), opr0, opr1); +} + +SDValue HSAILTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + + EVT VT = Op.getValueType(); + LoadSDNode *LD = cast(Op); + + if (VT.getSimpleVT() == MVT::i1) { + // Since there are no 1 bit load operations, the load operations are + // converted to 8 bit loads. + // First, do 8 bit load into 32 bits with sign extension, then + // truncate to 1 bit. + LoadSDNode *LD = cast(Op); + SDValue NewLD = + DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, LD->getChain(), + LD->getBasePtr(), MVT::i8, LD->getMemOperand()); + + SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD); + SDValue Ops[] = {Result, NewLD.getValue(1)}; + + return DAG.getMergeValues(Ops, dl); + } + + // Custom lowering for extload from sub-dword size to i64. We only + // do it because LLVM currently does not support Expand for EXTLOAD + // with illegal types. + // See "EXTLOAD should always be supported!" assert in LegalizeDAG.cpp. + if (VT.getSimpleVT() != MVT::i64) + return Op; + ISD::LoadExtType extType = LD->getExtensionType(); + + if (extType == ISD::SEXTLOAD && LD->hasNUsesOfValue(1, 0)) { + // Check if the only use is a truncation to the size of loaded memory. + // In this case produce zext instead of sext. Note, that load chain + // has its own use. + SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end(); + for (; UI != UE; ++UI) { + if (UI.getUse().getResNo() == 0) { + // User of a loaded value. + if (UI->getOpcode() == ISD::AND && + isa(UI->getOperand(1))) { + EVT MemVT = LD->getMemoryVT(); + uint64_t Mask = UI->getConstantOperandVal(1); + if ((MemVT == MVT::i8 && Mask == 0xFFul) || + (MemVT == MVT::i16 && Mask == 0xFFFFul)) { + // The AND operator was not really needed. Produce zextload as it + // does + // not change the result and let AND node silintly die. + extType = ISD::ZEXTLOAD; + } + } + break; + } + } + } + + // Do extload into 32-bit register, then extend that. + SDValue NewLD = + DAG.getExtLoad(extType, dl, MVT::i32, LD->getChain(), LD->getBasePtr(), + MVT::i8, LD->getMemOperand()); + + SDValue Ops[] = { + DAG.getNode(ISD::getExtForLoadExtType(false, extType), dl, MVT::i64, NewLD), + NewLD.getValue(1) + }; + + // Replace chain in all uses. + // XXX: Do we really need to do this? + DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), NewLD.getValue(1)); + + return DAG.getMergeValues(Ops, dl); +} + +SDValue HSAILTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { + // Since there are no 1 bit store operations, the store operations are + // converted to 8 bit stores. + // First, sign extend to 32 bits, then use a truncating store to 8 bits. + + SDLoc dl = SDLoc(Op); + StoreSDNode *ST = cast(Op); + + SDValue Chain = ST->getChain(); + SDValue BasePtr = ST->getBasePtr(); + SDValue Value = ST->getValue(); + MachineMemOperand *MMO = ST->getMemOperand(); + + assert(Value.getValueType() == MVT::i1 && + "Custom lowering only for i1 stores"); + + Value = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Value); + return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO); +} + +static SDValue getMemFenceImpl(SDValue Chain, SDLoc SL, unsigned MemoryOrder, + unsigned GlobalMemoryScope, + unsigned GroupMemoryScope, + unsigned ImageMemoryScope, + SelectionDAG &CurDAG) { + const SDValue Ops[] = { + Chain, + CurDAG.getTargetConstant(HSAILIntrinsic::HSAIL_memfence, SL, MVT::i64), + CurDAG.getConstant(MemoryOrder, SL, MVT::i32), + CurDAG.getConstant(GlobalMemoryScope, SL, MVT::i32), + CurDAG.getConstant(GroupMemoryScope, SL, MVT::i32), + CurDAG.getConstant(ImageMemoryScope, SL, MVT::i32) + }; + + return CurDAG.getNode(ISD::INTRINSIC_VOID, SL, MVT::Other, Ops); +} + +static SDValue getMemFence(SDValue Chain, SDLoc SL, unsigned AS, + unsigned MemoryOrder, unsigned MemoryScope, + SelectionDAG &CurDAG) { + switch (AS) { + case HSAILAS::GLOBAL_ADDRESS: + return getMemFenceImpl(Chain, SL, MemoryOrder, MemoryScope, + BRIG_MEMORY_SCOPE_NONE, BRIG_MEMORY_SCOPE_NONE, + CurDAG); + + case HSAILAS::GROUP_ADDRESS: + return getMemFenceImpl(Chain, SL, MemoryOrder, MemoryScope, MemoryScope, + BRIG_MEMORY_SCOPE_NONE, CurDAG); + + case HSAILAS::FLAT_ADDRESS: + return getMemFenceImpl(Chain, SL, MemoryOrder, MemoryScope, + BRIG_MEMORY_SCOPE_WORKGROUP, BRIG_MEMORY_SCOPE_NONE, + CurDAG); + + default: + llvm_unreachable("unexpected memory segment"); + } +} + +SDValue HSAILTargetLowering::LowerATOMIC_LOAD(SDValue Op, + SelectionDAG &DAG) const { + // HSAIL doesnt support SequentiallyConsistent, + // lower an atomic load with SequentiallyConsistent memory order + // to a Release memfence and Acquire atomic load + AtomicSDNode *Node = cast(Op); + + if (Node->getOrdering() != SequentiallyConsistent) + return Op; + + unsigned brigMemoryOrder = BRIG_MEMORY_ORDER_SC_RELEASE; + unsigned brigMemoryScope = Node->getAddressSpace() == HSAILAS::GROUP_ADDRESS + ? BRIG_MEMORY_SCOPE_WORKGROUP + : BRIG_MEMORY_SCOPE_SYSTEM; + + SDLoc SL(Op); + + SDValue Chain = getMemFence(Op.getOperand(0), Op, Node->getAddressSpace(), + brigMemoryOrder, brigMemoryScope, DAG); + + return DAG.getAtomic(ISD::ATOMIC_LOAD, SL, Node->getMemoryVT(), + Op.getValueType(), Chain, Node->getBasePtr(), + Node->getMemOperand(), Acquire, Node->getSynchScope()); +} + +SDValue HSAILTargetLowering::LowerATOMIC_STORE(SDValue Op, + SelectionDAG &DAG) const { + // HSAIL doesnt support SequentiallyConsistent, + // lower an atomic store with SequentiallyConsistent memory order + // to Release atomic store and Acquire memfence + AtomicSDNode *Node = cast(Op); + + if (Node->getOrdering() != SequentiallyConsistent) + return Op; + + unsigned MemoryOrder = BRIG_MEMORY_ORDER_SC_ACQUIRE; + unsigned MemoryScope = Node->getAddressSpace() == HSAILAS::GROUP_ADDRESS + ? BRIG_MEMORY_SCOPE_WORKGROUP + : BRIG_MEMORY_SCOPE_SYSTEM; + + SDLoc SL(Op); + + SDValue ResNode = + DAG.getAtomic(ISD::ATOMIC_STORE, SL, Node->getMemoryVT(), + Node->getOperand(0), // Chain + Node->getBasePtr(), Node->getVal(), Node->getMemOperand(), + Release, Node->getSynchScope()); + return getMemFence(ResNode, Op, Node->getAddressSpace(), MemoryOrder, + MemoryScope, DAG); +} + +//===--------------------------------------------------------------------===// +bool HSAILTargetLowering::isLegalAddressingMode(const AddrMode &AM, + Type *Ty) const { + if (Subtarget->isGCN()) { + // Do not generate negative offsets as they can not be folded into + // instructions. + if (AM.BaseOffs < 0 || AM.Scale < 0) + return false; + } + + return TargetLowering::isLegalAddressingMode(AM, Ty); +} + +bool HSAILTargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { + return false; +} + +bool HSAILTargetLowering::isZExtFree(EVT VT1, EVT VT2) const { return false; } + +bool HSAILTargetLowering::isFAbsFree(EVT VT) const { return true; } + +bool HSAILTargetLowering::isFNegFree(EVT VT) const { return true; } + +bool HSAILTargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const { + // This is only profitable in HSAIL to go from a 64bit type to + // a 32bit type, but not to a 8 or 16bit type. + return (VT1 == EVT(MVT::i64) && VT2 == EVT(MVT::i32)) || + (VT1 == EVT(MVT::f64) && VT2 == EVT(MVT::f32)); +} + +bool HSAILTargetLowering::isLegalICmpImmediate(int64_t Imm) const { + // HSAIL doesn't have any restrictions on this. + return true; +} + +MVT HSAILTargetLowering::getScalarShiftAmountTy(EVT LHSTy) const { + // Shift amounts in registers must be in S registers + // Restrict shift amount to 32-bits. + return MVT::i32; +} + +bool HSAILTargetLowering::isNoopAddrSpaceCast(unsigned SrcAS, + unsigned DestAS) const { + return (SrcAS == HSAILAS::FLAT_ADDRESS && + DestAS == HSAILAS::GLOBAL_ADDRESS) || + (SrcAS == HSAILAS::GLOBAL_ADDRESS && DestAS == HSAILAS::FLAT_ADDRESS); +} + +void HSAILTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, + SDNode *Node) const { + const HSAILInstrInfo *TII = + static_cast(Subtarget->getInstrInfo()); + + if (TII->isInstAtomic(MI->getOpcode()) && !Node->hasAnyUseOfValue(0)) { + int NoRetAtomicOp = HSAIL::getAtomicNoRetOp(MI->getOpcode()); + if (NoRetAtomicOp != -1) { + MI->setDesc(TII->get(NoRetAtomicOp)); + MI->RemoveOperand(0); + } + + return; + } +} + +bool HSAILTargetLowering::isLoadBitCastBeneficial(EVT lVT, EVT bVT) const { + return !(lVT.getSizeInBits() == bVT.getSizeInBits() && + lVT.getScalarType().getSizeInBits() > + bVT.getScalarType().getSizeInBits() && + bVT.getScalarType().getSizeInBits() < 32 && + lVT.getScalarType().getSizeInBits() >= 32); +} + +bool HSAILTargetLowering::isVectorToScalarLoadStoreWidenBeneficial( + unsigned Width, EVT WidenVT, const MemSDNode *N) const { + unsigned WidenWidth = WidenVT.getSizeInBits(); + + // In HSAIL we have _v3 loads and stores, and in case of uneven vector size + // it is more effective to use one _v3 load instead of several _v1 loads + // For example for vector load of 3 integers: + // ld_v1_u64 + // ld_v1_u32 + // Is worse than: + // ld_v3_u32 + if ((Width * 4 / 3) == WidenWidth) + return false; + return true; +} Index: lib/Target/HSAIL/HSAILImages.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILImages.td @@ -0,0 +1,356 @@ +//===------------------------------------------------------*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//////////////////////////////////////////////////////////////////////////////// +// image operations + +let isImageInst=1 in { + +// Image read +class ReadImage1D + : HSAILInst<(outs GPR32:$destR, GPR32:$destG, GPR32:$destB, GPR32:$destA), + (ins GPR64:$image, GPR64:$sampler, GPR32:$coordWidth), + !strconcat(asm, + "( $destR, $destG, $destB, $destA ), " + "$image, $sampler, $coordWidth"), []>; + +class ReadImage2D + : HSAILInst<(outs GPR32:$destR, GPR32:$destG, GPR32:$destB, GPR32:$destA), + (ins GPR64:$image, GPR64:$sampler, GPR32:$coordWidth, GPR32:$coordHeight), + !strconcat(asm, + "( $destR, $destG, $destB, $destA ), " + "$image, $sampler, ( $coordWidth, $coordHeight )"), []>; + + +class ReadImage3D + : HSAILInst<(outs GPR32:$destR, GPR32:$destG, GPR32:$destB, GPR32:$destA), + (ins GPR64:$image, GPR64:$sampler, GPR32:$coordWidth, GPR32:$coordHeight, GPR32:$coordDepth), + !strconcat(asm, + "( $destR, $destG, $destB, $destA ), " + "$image, $sampler, ( $coordWidth, $coordHeight, $coordDepth )"), []>; + +class ReadImage2DDepth + : HSAILInst<(outs GPR32:$destR), + (ins GPR64:$image, GPR64:$sampler, GPR32:$coordWidth, GPR32:$coordHeight), + !strconcat(asm, + "( $destR ), " + "$image, $sampler, ( $coordWidth, $coordHeight )"), []>; + +class ReadImage2DArrayDepth + : HSAILInst<(outs GPR32:$destR), + (ins GPR64:$image, GPR64:$sampler, GPR32:$coordWidth, GPR32:$coordHeight, GPR32:$coordArrayIndex), + !strconcat(asm, + "( $destR ), " + "$image, $sampler, ( $coordWidth, $coordHeight, $coordArrayIndex )"), []>; + +// read image 1d +def RDIMAGE : HSAILInstImage_Ld_1D<"rdimage", BrigOpcode.RDIMAGE>; + + +def rd_imgf_1d_s32 : ReadImage1D<"rdimage_v4_1d_f32_roimg_s32">; +def rd_imgf_1d_f32 : ReadImage1D<"rdimage_v4_1d_f32_roimg_f32">; +def rd_imgi_1d_s32 : ReadImage1D<"rdimage_v4_1d_s32_roimg_s32">; +def rd_imgi_1d_f32 : ReadImage1D<"rdimage_v4_1d_s32_roimg_f32">; +def rd_imgui_1d_s32 : ReadImage1D<"rdimage_v4_1d_u32_roimg_s32">; +def rd_imgui_1d_f32 : ReadImage1D<"rdimage_v4_1d_u32_roimg_f32">; + +// read image 1da + +def rd_imgf_1da_s32 : ReadImage2D<"rdimage_v4_1da_f32_roimg_s32">; +def rd_imgf_1da_f32 : ReadImage2D<"rdimage_v4_1da_f32_roimg_f32">; +def rd_imgi_1da_s32 : ReadImage2D<"rdimage_v4_1da_s32_roimg_s32">; +def rd_imgi_1da_f32 : ReadImage2D<"rdimage_v4_1da_s32_roimg_f32">; +def rd_imgui_1da_s32 : ReadImage2D<"rdimage_v4_1da_u32_roimg_s32">; +def rd_imgui_1da_f32 : ReadImage2D<"rdimage_v4_1da_u32_roimg_f32">; + +// read image 2d + +def rd_imgf_2d_s32 : ReadImage2D<"rdimage_v4_2d_f32_roimg_s32">; +def rd_imgf_2d_f32 : ReadImage2D<"rdimage_v4_2d_f32_roimg_f32">; +def rd_imgi_2d_s32 : ReadImage2D<"rdimage_v4_2d_s32_roimg_s32">; +def rd_imgi_2d_f32 : ReadImage2D<"rdimage_v4_2d_s32_roimg_f32">; +def rd_imgui_2d_s32 : ReadImage2D<"rdimage_v4_2d_u32_roimg_s32">; +def rd_imgui_2d_f32 : ReadImage2D<"rdimage_v4_2d_u32_roimg_f32">; + + +// read image 2da + +def rd_imgf_2da_s32 : ReadImage3D<"rdimage_v4_2da_f32_roimg_s32">; +def rd_imgf_2da_f32 : ReadImage3D<"rdimage_v4_2da_f32_roimg_f32">; +def rd_imgi_2da_s32 : ReadImage3D<"rdimage_v4_2da_s32_roimg_s32">; +def rd_imgi_2da_f32 : ReadImage3D<"rdimage_v4_2da_s32_roimg_f32">; +def rd_imgui_2da_s32 : ReadImage3D<"rdimage_v4_2da_u32_roimg_s32">; +def rd_imgui_2da_f32 : ReadImage3D<"rdimage_v4_2da_u32_roimg_f32">; + +// read image 3d + +def rd_imgf_3d_s32 : ReadImage3D<"rdimage_v4_3d_f32_roimg_s32">; +def rd_imgf_3d_f32 : ReadImage3D<"rdimage_v4_3d_f32_roimg_f32">; +def rd_imgi_3d_s32 : ReadImage3D<"rdimage_v4_3d_s32_roimg_s32">; +def rd_imgi_3d_f32 : ReadImage3D<"rdimage_v4_3d_s32_roimg_f32">; +def rd_imgui_3d_s32 : ReadImage3D<"rdimage_v4_3d_u32_roimg_s32">; +def rd_imgui_3d_f32 : ReadImage3D<"rdimage_v4_3d_u32_roimg_f32">; + +// OpenCL 2.0 rd2ddepth + +def rd_imgf_2ddepth_s32 : ReadImage2DDepth<"rdimage_2ddepth_f32_roimg_s32">; +def rd_imgf_2ddepth_f32 : ReadImage2DDepth<"rdimage_2ddepth_f32_roimg_f32">; + +// OpenCL 2.0 rd2dadepth + +def rd_imgf_2dadepth_s32 : ReadImage2DArrayDepth<"rdimage_2dadepth_f32_roimg_s32">; +def rd_imgf_2dadepth_f32 : ReadImage2DArrayDepth<"rdimage_2dadepth_f32_roimg_f32">; + + +// Image loads +class LoadImage1D + : HSAILInst<(outs GPR32:$destR, GPR32:$destG, GPR32:$destB, GPR32:$destA), + (ins GPR64:$image, GPR32:$coordWidth), + !strconcat(asm, + "( $destR, $destG, $destB, $destA ), " + "$image, $coordWidth"), []>; + +class LoadImage2D + : HSAILInst<(outs GPR32:$destR, GPR32:$destG, GPR32:$destB, GPR32:$destA), + (ins GPR64:$image, GPR32:$coordWidth, GPR32:$coordHeight), + !strconcat(asm, + "( $destR, $destG, $destB, $destA ), " + "$image, ( $coordWidth, $coordHeight )"), []>; + +class LoadImage3D + : HSAILInst<(outs GPR32:$destR, GPR32:$destG, GPR32:$destB, GPR32:$destA), + (ins GPR64:$image, GPR32:$coordWidth, GPR32:$coordHeight, GPR32:$coordDepth), + !strconcat(asm, + "( $destR, $destG, $destB, $destA ), " + "$image, ( $coordWidth, $coordHeight, $coordDepth )"), []>; + +class LoadImage2DDepth + : HSAILInst<(outs GPR32:$destR), + (ins GPR64:$image, GPR32:$coordWidth, GPR32:$coordHeight), + !strconcat(asm, + "( $destR ), " + "$image, ( $coordWidth, $coordHeight )"), []>; + +class LoadImage2DArrayDepth + : HSAILInst<(outs GPR32:$destR), + (ins GPR64:$image, GPR32:$coordWidth, GPR32:$coordHeight, GPR32:$coordArrayIndex), + !strconcat(asm, + "( $destR ), " + "$image, ( $coordWidth, $coordHeight, $coordArrayIndex )"), []>; + +// load image 1d + +def ld_imgf_1d_u32 : LoadImage1D<"ldimage_v4_1d_f32_rwimg_u32">; +def ld_imgi_1d_u32 : LoadImage1D<"ldimage_v4_1d_s32_rwimg_u32">; +def ld_imgui_1d_u32 : LoadImage1D<"ldimage_v4_1d_u32_rwimg_u32">; + +// load image 1d buffer + +def ld_imgf_1db_u32 : LoadImage1D<"ldimage_v4_1db_f32_rwimg_u32">; +def ld_imgi_1db_u32 : LoadImage1D<"ldimage_v4_1db_s32_rwimg_u32">; +def ld_imgui_1db_u32 : LoadImage1D<"ldimage_v4_1db_u32_rwimg_u32">; + +// load image 1d array + +def ld_imgf_1da_u32 : LoadImage2D<"ldimage_v4_1da_f32_rwimg_u32">; +def ld_imgi_1da_u32 : LoadImage2D<"ldimage_v4_1da_s32_rwimg_u32">; +def ld_imgui_1da_u32 : LoadImage2D<"ldimage_v4_1da_u32_rwimg_u32">; + +// load image 2d + +def ld_imgf_2d_u32 : LoadImage2D<"ldimage_v4_2d_f32_rwimg_u32">; +def ld_imgi_2d_u32 : LoadImage2D<"ldimage_v4_2d_s32_rwimg_u32">; +def ld_imgui_2d_u32 : LoadImage2D<"ldimage_v4_2d_u32_rwimg_u32">; + +// load image 2d array + +def ld_imgf_2da_u32 : LoadImage3D<"ldimage_v4_2da_f32_rwimg_u32">; +def ld_imgi_2da_u32 : LoadImage3D<"ldimage_v4_2da_s32_rwimg_u32">; +def ld_imgui_2da_u32 : LoadImage3D<"ldimage_v4_2da_u32_rwimg_u32">; + +// load image 3d + +def ld_imgf_3d_u32 : LoadImage3D<"ldimage_v4_3d_f32_rwimg_u32">; +def ld_imgi_3d_u32 : LoadImage3D<"ldimage_v4_3d_s32_rwimg_u32">; +def ld_imgui_3d_u32 : LoadImage3D<"ldimage_v4_3d_u32_rwimg_u32">; + +// load image 2ddepth +def ld_imgf_2ddepth_u32 : LoadImage2DDepth<"ldimage_2ddepth_f32_rwimg_u32">; + +// load image 2dadepth +def ld_imgf_2dadepth_u32 : LoadImage2DArrayDepth<"ldimage_2dadepth_f32_rwimg_u32">; + +// Image store +class StImage1d : HSAILInst<(outs), + (ins GPR32:$srcR, GPR32:$srcG, GPR32:$srcB, GPR32:$srcA, + GPR64:$image, GPR32:$coordWidth), + !strconcat(asm, "\t ( $srcR, $srcG, $srcB, $srcA ), $image, " + "$coordWidth"), + [(intr (ValTy GPR32:$srcR), (ValTy GPR32:$srcG), + (ValTy GPR32:$srcB), (ValTy GPR32:$srcA), + (i64 GPR64:$image), + (CoordTy GPR32:$coordWidth))]>; + +class StImage2d : HSAILInst<(outs), + (ins GPR32:$srcR, GPR32:$srcG, GPR32:$srcB, GPR32:$srcA, + GPR64:$image, GPR32:$coordWidth, GPR32:$coordHeight), + !strconcat(asm, "\t ( $srcR, $srcG, $srcB, $srcA ), $image, " + "( $coordWidth, $coordHeight )"), + [(intr (ValTy GPR32:$srcR), (ValTy GPR32:$srcG), + (ValTy GPR32:$srcB), (ValTy GPR32:$srcA), + (i64 GPR64:$image), + (CoordTy GPR32:$coordWidth), (CoordTy GPR32:$coordHeight))]>; + +// FIXME: What is $src9? I don't see it in the spec. +class StImage3d : HSAILInst<(outs), + (ins GPR32:$srcR, GPR32:$srcG, GPR32:$srcB, GPR32:$srcA, + GPR64:$image, + GPR32:$coordWidth, GPR32:$coordHeight, GPR32:$coordDepth, GPR32:$src9), + !strconcat(asm, "\t ( $srcR, $srcG, $srcB, $srcA ), $image, " + "( $coordWidth, $coordHeight, $coordDepth, $src9 )"), + [(intr (ValTy GPR32:$srcR), (ValTy GPR32:$srcG), + (ValTy GPR32:$srcB), (ValTy GPR32:$srcA), + (i64 GPR64:$image), + (CoordTy GPR32:$coordWidth), (CoordTy GPR32:$coordHeight), + (CoordTy GPR32:$coordDepth), (CoordTy GPR32:$src9))]>; + +//image 2.0 2d depth +class StImage2dDepth : HSAILInst<(outs), + (ins GPR32:$srcR, + GPR64:$image, GPR32:$coordWidth, GPR32:$coordHeight), + !strconcat(asm, "\t $srcR, $image, " + "( $coordWidth, $coordHeight )"), + [(intr (ValTy GPR32:$srcR), + (i64 GPR64:$image), + (CoordTy GPR32:$coordWidth), (CoordTy GPR32:$coordHeight))]>; + +// FIXME: What is $src6? I don't see it in the spec. +//image 2.0 2d array depth +class StImage2dArrayDepth : HSAILInst<(outs), + (ins GPR32:$srcR, + GPR64:$image, + GPR32:$coordWidth, GPR32:$coordHeight, GPR32:$coordArrayIndex, GPR32:$src6), + !strconcat(asm, "\t $srcR, $image, " + "( $coordWidth, $coordHeight, $coordArrayIndex, $src6 )"), + [(intr (ValTy GPR32:$srcR), + (i64 GPR64:$image), + (CoordTy GPR32:$coordWidth), (CoordTy GPR32:$coordHeight), + (CoordTy GPR32:$coordArrayIndex), (CoordTy GPR32:$src6))]>; + +// store image 1d +def stimagef_1d_i32 : StImage1d; +def stimagei_1d_i32 : StImage1d; +def stimageui_1d_i32 : StImage1d; + +// store image 1d array +def stimagef_1da_i32 : StImage2d; +def stimagei_1da_i32 : StImage2d; +def stimageui_1da_i32 : StImage2d; + +// store image 1d buffer +def stimagef_1db_i32 : StImage1d; +def stimagei_1db_i32 : StImage1d; +def stimageui_1db_i32 : StImage1d; + +// store image 2d +def stimagef_2d_i32 : StImage2d; +def stimagei_2d_i32 : StImage2d; +def stimageui_2d_i32 : StImage2d; + +// store image 2d array +def stimagef_2da_i32 : StImage3d; +def stimagei_2da_i32 : StImage3d; +def stimageui_2da_i32 : StImage3d; + +// store image 3d +def stimagef_3d_i32 : StImage3d; +def stimagei_3d_i32 : StImage3d; +def stimageui_3d_i32 : StImage3d; + +// store image 2d depth +def stimagef_2ddepth_i32 : StImage2dDepth; + +// Store image 2d array depth +def stimagef_2dadepth_i32 : StImage2dArrayDepth; + +} // isImageInst end + +// Query image +class QueryImage : HSAILInst< + (outs GPR32:$dest), (ins GPR64:$image), + !strconcat(asm, "\t $dest, $image"), + [(set (i32 GPR32:$dest), (intr (i64 GPR64:$image)))]>; + +def query_width_1d : QueryImage; +def query_width_1db: QueryImage; +def query_width_1da: QueryImage; +def query_width_2d : QueryImage; +def query_width_2da: QueryImage; +def query_width_3d : QueryImage; +def query_width_2ddepth : QueryImage; +def query_width_2dadepth : QueryImage; + + +def query_height_2d : QueryImage; +def query_height_2da: QueryImage; +def query_height_3d : QueryImage; +def query_height_2ddepth : QueryImage; +def query_height_2dadepth : QueryImage; + +def query_depth : QueryImage; + +def query_format_1d : QueryImage; +def query_format_1db: QueryImage; +def query_format_1da: QueryImage; +def query_format_2d : QueryImage; +def query_format_2da: QueryImage; +def query_format_3d : QueryImage; + +def query_order_1d : QueryImage; +def query_order_1db: QueryImage; +def query_order_1da: QueryImage; +def query_order_2d : QueryImage; +def query_order_2da: QueryImage; +def query_order_3d : QueryImage; + +def query_array_1da: QueryImage; +def query_array_2da: QueryImage; +def query_array_2dadepth : QueryImage; + +def query_channelorder_2ddepth : QueryImage; +def query_channelorder_2dadepth : QueryImage; + +def query_channeltype_2ddepth : QueryImage; +def query_channeltype_2dadepth : QueryImage; + Index: lib/Target/HSAIL/HSAILInstrFormats.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILInstrFormats.td @@ -0,0 +1,1556 @@ +//===------ HSAILInstrFormats.td ---------------------------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instruction format superclass +//===----------------------------------------------------------------------===// + +// TODO HSA : fix so that a space is not emitted prior to end of +// statement semi-colon + +class HSAILInst pattern, + bits<16> BrigVal = 0, + bit appendSemicolon = 1> + : Instruction { + field bits<32> Inst; + let Namespace = "HSAIL"; + dag OutOperandList = outs; + dag InOperandList = ins; + let AsmString = !if(appendSemicolon, !strconcat(asmstr, ";"), asmstr); + let Pattern = pattern; + + let UseNamedOperandTable = 1; + let Inst{15-0} = BrigVal; + + // Target specific flags + // Important! Keep in sync with HSAIL.h::HSAILTSFLAG + + // Instruction classes. See BrigKind. + bit InstAddr = 0; + bit InstAtomic = 0; + bit InstBasic = 0; + bit InstBr = 0; + bit InstCmp = 0; + bit InstCvt = 0; + bit InstImage = 0; + bit InstLane = 0; + bit InstMem = 0; + bit InstMemFence = 0; + bit InstMod = 0; + bit InstQueryImage = 0; + bit InstQuerySampler = 0; + bit InstQueue = 0; + bit InstSeg = 0; + bit InstSegCvt = 0; + bit InstSignal = 0; + bit InstSourceType = 0; + + // FIXME: Remove these + bit isConv = 0; + bit isImageInst = 0; + + bits<2> RoundAttr = 0; + + // Most instructions with a width modifier default to 1. + bits<2> WidthAttr = WidthAttrValues.ONE; + + bit HasDefaultSegment = 0; + + let TSFlags{3} = InstAddr; + let TSFlags{4} = InstAtomic; + let TSFlags{5} = InstBasic; + let TSFlags{6} = InstBr; + let TSFlags{7} = InstCmp; + let TSFlags{8} = InstCvt; + let TSFlags{9} = InstImage; + let TSFlags{10} = InstLane; + let TSFlags{11} = InstMem; + let TSFlags{12} = InstMemFence; + let TSFlags{13} = InstMod; + let TSFlags{14} = InstQueryImage; + let TSFlags{15} = InstQuerySampler; + let TSFlags{16} = InstQueue; + let TSFlags{17} = InstSeg; + let TSFlags{18} = InstSegCvt; + let TSFlags{19} = InstSignal; + let TSFlags{20} = InstSourceType; + + let TSFlags{23} = isConv; + let TSFlags{24} = isImageInst; + + let TSFlags{26-25} = RoundAttr; + let TSFlags{28-27} = WidthAttr; + + let TSFlags{29} = HasDefaultSegment; + + // Store the opcode here because we want access to it in + // BRIGAsmPrinter, but can't really use the normal MC binary + // encoding method to do so. + let TSFlags{63-48} = BrigVal; +} + +//////////////////////////////////////////////////////////////////////////////// +// Basic instruction formats +//////////////////////////////////////////////////////////////////////////////// +// InstBasic +class HSAILInstBasic BrigVal, + dag outs, dag ins, string asmstr> : + HSAILInst { + let hasSideEffects = 0; + let InstBasic = 1; +} + +class HSAILInstBasic_0Op_NoRet BrigVal, HSAILProfile P> : + HSAILInstBasic { + let hasSideEffects = 1; +} + +class HSAILInstBasic_0Op BrigVal, HSAILProfile P> : + HSAILInstBasic; + +multiclass InstBasic_0Op_UnsignedIntTypes BrigVal> { + def _U32 : HSAILInstBasic_0Op; + def _U64 : HSAILInstBasic_0Op; +} + +class HSAILInstBasic_1Op BrigVal, HSAILProfile P> : + HSAILInstBasic; + +multiclass InstBasic_1Op_FPTypes BrigVal> { + def _F32 : HSAILInstBasic_1Op; + def _F64 : HSAILInstBasic_1Op; +} + +multiclass InstBasic_1Op_SignedIntTypes BrigVal> { + def _S32 : HSAILInstBasic_1Op; + def _S64 : HSAILInstBasic_1Op; +} + +multiclass InstBasic_1Op_UnsignedIntTypes BrigVal> { + def _U32 : HSAILInstBasic_1Op; + def _U64 : HSAILInstBasic_1Op; +} + +multiclass InstBasic_1Op_BitTypes BrigVal> { + def _B1 : HSAILInstBasic_1Op; + def _B32 : HSAILInstBasic_1Op; + def _B64 : HSAILInstBasic_1Op; +} + +multiclass InstBasic_1Op_SF BrigVal> { + def _F32 : HSAILInstBasic_1Op; + def _F64 : HSAILInstBasic_1Op; + + def _S32 : HSAILInstBasic_1Op; + def _S64 : HSAILInstBasic_1Op; +} + +multiclass InstBasic_1Op_BF BrigVal> { + def _F32 : HSAILInstBasic_1Op; + def _F64 : HSAILInstBasic_1Op; + + def _B1 : HSAILInstBasic_1Op; + def _B32 : HSAILInstBasic_1Op; + def _B64 : HSAILInstBasic_1Op; +} + +class HSAILInstBasic_2Op BrigVal, HSAILProfile P> : + HSAILInstBasic; + +multiclass InstBasic_2Op_FPTypes BrigVal> { + def _F32 : HSAILInstBasic_2Op; + def _F64 : HSAILInstBasic_2Op; +} + +multiclass InstBasic_2Op_SignedIntTypes BrigVal> { + def _S32 : HSAILInstBasic_2Op; + def _S64 : HSAILInstBasic_2Op; +} + +multiclass InstBasic_2Op_UnsignedIntTypes BrigVal> { + def _U32 : HSAILInstBasic_2Op; + def _U64 : HSAILInstBasic_2Op; +} + +multiclass InstBasic_2Op_BitTypes BrigVal> { + def _B1 : HSAILInstBasic_2Op; + def _B32 : HSAILInstBasic_2Op; + def _B64 : HSAILInstBasic_2Op; +} + +multiclass InstBasic_2Op_IntTypes BrigVal> { + def _S32 : HSAILInstBasic_2Op; + def _S64 : HSAILInstBasic_2Op; + def _U32 : HSAILInstBasic_2Op; + def _U64 : HSAILInstBasic_2Op; +} + +multiclass InstBasic_2Op_ShiftTypes BrigVal> { + def _S32 : HSAILInstBasic_2Op; + def _S64 : HSAILInstBasic_2Op; + def _U32 : HSAILInstBasic_2Op; + def _U64 : HSAILInstBasic_2Op; +} + +multiclass InstBasic_2Op_LdExp BrigVal> { + def _F32 : HSAILInstBasic_2Op; + def _F64 : HSAILInstBasic_2Op; +} + + +class HSAILInstBasic_3Op BrigVal, HSAILProfile P> : + HSAILInstBasic; + +multiclass InstBasic_3Op_FPTypes BrigVal> { + def _F32 : HSAILInstBasic_3Op; + def _F64 : HSAILInstBasic_3Op; +} + +multiclass InstBasic_3Op_IntTypes BrigVal> { + def _S32 : HSAILInstBasic_3Op; + def _S64 : HSAILInstBasic_3Op; + def _U32 : HSAILInstBasic_3Op; + def _U64 : HSAILInstBasic_3Op; +} + +multiclass InstBasic_3Op_BitTypes BrigVal> { + def _B1 : HSAILInstBasic_3Op; + def _B32 : HSAILInstBasic_3Op; + def _B64 : HSAILInstBasic_3Op; +} + +multiclass InstBasic_3Op_CMov BrigVal> { + def _B1 : HSAILInstBasic_3Op; + def _B32 : HSAILInstBasic_3Op; + def _B64 : HSAILInstBasic_3Op; +} + +multiclass InstBasic_3Op_BitExtract BrigVal> { + def _S32 : HSAILInstBasic_3Op; + def _S64 : HSAILInstBasic_3Op; + + def _U32 : HSAILInstBasic_3Op; + def _U64 : HSAILInstBasic_3Op; +} + +multiclass InstBasic_3Op_SUF BrigVal> { + def _S32 : HSAILInstBasic_3Op; + def _S64 : HSAILInstBasic_3Op; + + def _U32 : HSAILInstBasic_3Op; + def _U64 : HSAILInstBasic_3Op; + + def _F32 : HSAILInstBasic_3Op; + def _F64 : HSAILInstBasic_3Op; +} + + +// InstMod +class HSAILInstMod BrigVal, + dag outs, dag ins, string asmstr> : + HSAILInst { + let hasSideEffects = 0; + let InstMod = 1; +} + +// TODO: Needs pack operand? +class HSAILInstMod_1Op BrigVal, HSAILProfile P> : + HSAILInstMod; + +multiclass InstMod_1Op_FPTypes BrigVal> { + def _F32 : HSAILInstMod_1Op; + def _F64 : HSAILInstMod_1Op; +} + +multiclass InstMod_1Op_SignedIntTypes BrigVal> { + def _S32 : HSAILInstMod_1Op; + def _S64 : HSAILInstMod_1Op; +} + +multiclass InstMod_1Op_UnsignedIntTypes BrigVal> { + def _U32 : HSAILInstMod_1Op; + def _U64 : HSAILInstMod_1Op; +} + +multiclass InstMod_1Op_1BitTypes BrigVal> { + def _B32 : HSAILInstMod_1Op; + def _B64 : HSAILInstMod_1Op; +} + +multiclass InstMod_1Op_IntTypes BrigVal> { + def _S32 : HSAILInstMod_1Op; + def _S64 : HSAILInstMod_1Op; + def _U32 : HSAILInstMod_1Op; + def _U64 : HSAILInstMod_1Op; +} + +class HSAILInstMod_2Op BrigVal, HSAILProfile P> : + HSAILInstMod; + +multiclass InstMod_2Op_FPTypes BrigVal> { + def _F32 : HSAILInstMod_2Op; + def _F64 : HSAILInstMod_2Op; +} + +multiclass InstMod_2Op_SignedIntTypes BrigVal> { + def _S32 : HSAILInstMod_2Op; + def _S64 : HSAILInstMod_2Op; +} + +multiclass InstMod_2Op_UnsignedIntTypes BrigVal> { + def _U32 : HSAILInstMod_2Op; + def _U64 : HSAILInstMod_2Op; +} + +multiclass InstMod_2Op_BitTypes BrigVal> { + def _B32 : HSAILInstMod_2Op; + def _B64 : HSAILInstMod_2Op; +} + +multiclass InstMod_2Op_IntTypes BrigVal> { + def _S32 : HSAILInstMod_2Op; + def _S64 : HSAILInstMod_2Op; + def _U32 : HSAILInstMod_2Op; + def _U64 : HSAILInstMod_2Op; +} + +multiclass InstMod_2Op_SUF BrigVal> { + def _S32 : HSAILInstMod_2Op; + def _S64 : HSAILInstMod_2Op; + + def _U32 : HSAILInstMod_2Op; + def _U64 : HSAILInstMod_2Op; + + def _F32 : HSAILInstMod_2Op; + def _F64 : HSAILInstMod_2Op; +} + + +class HSAILInstMod_3Op BrigVal, HSAILProfile P> : + HSAILInstMod; + +multiclass InstMod_3Op_FPTypes BrigVal> { + def _F32 : HSAILInstMod_3Op; + def _F64 : HSAILInstMod_3Op; +} + + +// InstCvt +class InstCvt BrigVal, + RegisterClass DestRC, + ValueType SrcVT> : HSAILInst< + (outs DestRC:$dest), + (ins ftz:$ftz, + BrigRound:$round, + BrigType:$destTypedestLength, + BrigType:$sourceType, + getRegClassForVT.ret:$src), + opName#"$ftz$round$destTypedestLength$sourceType\t$dest, $src", + [], BrigVal +> { + let isConv = 1; + let InstCvt = 1; +} + +// This also generates the illegal type to same type cvts, which +// hopefully will never be used. +multiclass InstCvt_SrcTypes BrigVal, + RegisterClass DestRC> { + def _B1 : InstCvt; + + def _U32 : InstCvt; + def _U64 : InstCvt; + + def _S32 : InstCvt; + def _S64 : InstCvt; + + def _F16 : InstCvt; + def _F32 : InstCvt; + def _F64 : InstCvt; +} + +multiclass InstCvt_DestTypes BrigVal> { + defm _B1 : InstCvt_SrcTypes; + + defm _U32 : InstCvt_SrcTypes; + defm _U64 : InstCvt_SrcTypes; + + defm _S32 : InstCvt_SrcTypes; + defm _S64 : InstCvt_SrcTypes; + + defm _F16 : InstCvt_SrcTypes; + defm _F32 : InstCvt_SrcTypes; + defm _F64 : InstCvt_SrcTypes; +} + +// InstSourceType +class HSAILInstSourceType BrigVal, + dag outs, dag ins, string asmstr> : + HSAILInst { + let hasSideEffects = 0; + let InstSourceType = 1; +} + +class HSAILInstSourceType_1Op BrigVal, HSAILProfile P> : + HSAILInstSourceType; + +multiclass InstSourceType_1Op_U32_BitTypes BrigVal> { + def _U32_B32 : HSAILInstSourceType_1Op; + def _U32_B64 : HSAILInstSourceType_1Op; +} + +multiclass InstSourceType_1Op_U32_IntTypes BrigVal> { + def _U32_S32 : HSAILInstSourceType_1Op; + def _U32_S64 : HSAILInstSourceType_1Op; + + def _U32_U32 : HSAILInstSourceType_1Op; + def _U32_U64 : HSAILInstSourceType_1Op; +} + +class HSAILInstSourceType_2Op BrigVal, HSAILProfile P> : + HSAILInstSourceType; + +multiclass InstSourceType_2Op_Class_Types BrigVal> { + def _F32 : HSAILInstSourceType_2Op; + def _F64 : HSAILInstSourceType_2Op; +} + +class HSAILInstSourceType_3Op BrigVal, HSAILProfile P> : + HSAILInstSourceType; + +// Missing types that require q registers. +multiclass InstSourceType_3Op_Pack_Types BrigVal> { + def _U8X4_U32 : HSAILInstSourceType_3Op; + def _S8X4_S32 : HSAILInstSourceType_3Op; + + def _U8X8_U32 : HSAILInstSourceType_3Op; + def _S8X8_S32 : HSAILInstSourceType_3Op; + + def _U16X4_U32 : HSAILInstSourceType_3Op; + def _S16X4_S32 : HSAILInstSourceType_3Op; + + def _U32X2_U32 : HSAILInstSourceType_3Op; + def _S32X2_S32 : HSAILInstSourceType_3Op; + + def _F16X2_F16 : HSAILInstSourceType_3Op; + def _F16X4_F16 : HSAILInstSourceType_3Op; +} + +multiclass InstSourceType_3Op_Sad_Types BrigVal> { + def _U32_U32 : HSAILInstSourceType_3Op; + def _U32_U16X2 : HSAILInstSourceType_3Op; + def _U32_U8X4 : HSAILInstSourceType_3Op; +} + +class HSAILInstSourceType_4Op BrigVal, HSAILProfile P> : + HSAILInstSourceType; + + + +// InstLane +class HSAILInstLane BrigVal, + dag outs, dag ins, string asmstr> : + HSAILInst { + let hasSideEffects = 1; + let InstLane = 1; +} + +class HSAILInstLane_0Op BrigVal, HSAILProfile P> : + HSAILInstLane< + BrigVal, + (outs P.DestRC:$dest), + (ins BrigWidth:$width, BrigType:$TypeLength, BrigType:$sourceType), + opName#"$width$TypeLength$sourceType\t$dest" +>; + +class HSAILInstLane_1Op BrigVal, HSAILProfile P> : + HSAILInstLane< + BrigVal, + (outs P.DestRC:$dest), + (ins BrigWidth:$width, P.Src0RC:$src0, BrigType:$TypeLength, BrigType:$sourceType), + opName#"$width$TypeLength$sourceType\t$dest, $src0" +>; + +class HSAILInstLane_ActiveLanePermute BrigVal, HSAILProfile P> : + HSAILInstLane< + BrigVal, + (outs P.DestRC:$dest), + (ins BrigWidth:$width, + P.Src0RC:$src0, P.Src1RC:$src1, P.Src2RC:$src2, P.Src3RC:$src3, + BrigType:$TypeLength, BrigType:$sourceType), + opName#"$width$TypeLength$sourceType\t$dest, $src0, $src1, $src2, $src3" +>; + +multiclass InstLane_ActiveLanePermute_Types BrigVal> { + def _B1 : HSAILInstLane_ActiveLanePermute; + def _B32 : HSAILInstLane_ActiveLanePermute; + def _B64 : HSAILInstLane_ActiveLanePermute; +} + +class HSAILInstLane_ActiveLaneMask BrigVal> : + HSAILInstLane< + BrigVal, + // FIXME: Using compound operands as dest seems to not be selectable + //(outs Vec4Op:$dest), + (outs GPR64:$dest0, GPR64:$dest1, GPR64:$dest2, GPR64:$dest3), + (ins BrigWidth:$width, + B1Op:$src0, BrigType:$TypeLength, BrigType:$sourceType), +// opName#"$width$TypeLength$sourceType\t$dest, $src0" + opName#"_v4$width$TypeLength$sourceType\t($dest0, $dest1, $dest2, $dest3), $src0" +>; + +// InstBr +class HSAILInstBr BrigVal, + dag outs, dag ins, string asmstr> : + HSAILInst { + let hasSideEffects = 1; + let InstBr = 1; +} + +class HSAILInstBr_0Op_NoRet BrigVal> : + HSAILInstBr; + +class HSAILInstBr_1Op_NoRet BrigVal> : + HSAILInstBr; + +class HSAILInstBr_2Op_NoRet BrigVal> : + HSAILInstBr; + +// InstSeg +class HSAILInstSeg BrigVal, + dag outs, dag ins, string asmstr> : + HSAILInst { + let hasSideEffects = 0; + let InstSeg = 1; +} + +class HSAILInstSeg_0Op BrigVal, HSAILProfile P> : + HSAILInstSeg; + +multiclass InstSeg_0Op_PtrTypes BrigVal> { + def _U32 : HSAILInstSeg_0Op; + def _U64 : HSAILInstSeg_0Op; +} + +// InstSegCvt +class HSAILInstSegCvt BrigVal, + dag outs, dag ins, string asmstr> : + HSAILInst { + let hasSideEffects = 0; + let InstSegCvt = 1; +} + +class HSAILInstSegCvt_1Op BrigVal, HSAILProfile P> : + HSAILInstSegCvt; + +multiclass InstSegCvt_1Op_PtrTypes BrigVal> { + def _U32_U32 : HSAILInstSegCvt_1Op; + def _U32_U64 : HSAILInstSegCvt_1Op; + def _U64_U32 : HSAILInstSegCvt_1Op; + def _U64_U64 : HSAILInstSegCvt_1Op; +} + +multiclass InstSegCvt_1Op_Segmentp_Types BrigVal> { + def _B1_U32 : HSAILInstSegCvt_1Op; + def _B1_U64 : HSAILInstSegCvt_1Op; +} + +// InstMemFence +class HSAILInstMemFence BrigVal, + dag outs, dag ins, string asmstr> : + HSAILInst { + let hasSideEffects = 1; + let mayLoad = 1; + let mayStore = 1; + let isBarrier = 1; + let isNotDuplicable = 1; // XXX - This seems unnecessary + let InstMemFence = 1; +} + +class InstMemFence BrigVal> : + HSAILInstMemFence< + BrigVal, + (outs), + (ins BrigMemoryOrder:$order, + BrigMemoryScope:$scope), + opName#"$order$scope" +>; + +// InstMem +class HSAILInstMem BrigVal, + dag outs, dag ins, string asmstr> : + HSAILInst { + let mayLoad = 1; + let mayStore = 1; + let InstMem = 1; +} + +class LD BrigVal, dag outs> : + HSAILInstMem { + let mayStore = 0; + let hasSideEffects = 0; +} + +class StoreOperands { + field dag op_ins = (ins srcOp:$src, MEMOP:$address, BrigType:$TypeLength, + BrigSegment:$segment, BrigAlignment:$align); +} + +def store_u32 : StoreOperands; +def store_f32 : StoreOperands; +def store_u64 : StoreOperands; +def store_f64 : StoreOperands; + +def store_v2_u32 : StoreOperands; +def store_v2_f32 : StoreOperands; + +def store_v2_u64 : StoreOperands; +def store_v2_f64 : StoreOperands; + +def store_v3_u32 : StoreOperands; +def store_v3_f32 : StoreOperands; + +def store_v3_u64 : StoreOperands; +def store_v3_f64 : StoreOperands; + +def store_v4_u32 : StoreOperands; +def store_v4_f32 : StoreOperands; +def store_v4_u64 : StoreOperands; +def store_v4_f64 : StoreOperands; + + +multiclass LD_Types BrigVal> { + def _S8 : LD; + def _U8 : LD; + + def _S16 : LD; + def _U16 : LD; + + def _S32 : LD, + LdStVectorMap; + def _U32 : LD, + LdStVectorMap; + def _F32 : LD, + LdStVectorMap; + + def _SAMP : LD, + LdStVectorMap; + + def _S64 : LD, + LdStVectorMap; + def _U64 : LD, + LdStVectorMap; + def _F64 : LD, + LdStVectorMap; + + def _V2_S32 : LD, + LdStVectorMap; + def _V2_U32 : LD, + LdStVectorMap; + def _V2_F32 : LD, + LdStVectorMap; + + def _V2_S64 : LD, + LdStVectorMap; + def _V2_U64 : LD, + LdStVectorMap; + def _V2_F64 : LD, + LdStVectorMap; + + def _V3_S32 : LD, + LdStVectorMap; + def _V3_U32 : LD, + LdStVectorMap; + def _V3_F32 : LD, + LdStVectorMap; + + def _V3_S64 : LD, + LdStVectorMap; + def _V3_U64 : LD, + LdStVectorMap; + def _V3_F64 : LD, + LdStVectorMap; + + def _V4_S32 : LD, + LdStVectorMap; + def _V4_U32 : LD, + LdStVectorMap; + def _V4_F32 : LD, + LdStVectorMap; + + def _V4_S64 : LD, + LdStVectorMap; + def _V4_U64 : LD, + LdStVectorMap; + def _V4_F64 : LD, + LdStVectorMap; +} + +class ST BrigVal, dag ins> : + HSAILInstMem { + let mayLoad = 0; + let hasSideEffects = 0; +} + +multiclass ST_Types BrigVal> { + def _U8 : ST; + def _U16 : ST; + + def _U32 : ST; + def _F32 : ST; + + def _U64 : ST; + def _F64 : ST; + + def _V2_U32 : ST; + def _V2_F32 : ST; + def _V2_U64 : ST; + def _V2_F64 : ST; + + def _V3_U32 : ST; + def _V3_F32 : ST; + def _V3_U64 : ST; + def _V3_F64 : ST; + + def _V4_U32 : ST; + def _V4_F32 : ST; + def _V4_U64 : ST; + def _V4_F64 : ST; +} + +// InstAtomic +class HSAILInstAtomic BrigVal, + dag outs, dag ins, string asmstr> : + HSAILInst { + let hasSideEffects = 1; + let mayLoad = 1; + let mayStore = 1; + let InstAtomic = 1; + + int atomicOperation = ?; +} + +class InstAtomic_0Op BrigVal, + int opVal, HSAILProfile P, string noRetOp = ""> : + HSAILInstAtomic< + BrigVal, + (outs P.DestRC:$dest), + (ins BrigAtomicOperation:$op, + AddressSpace:$segment, + BrigMemoryOrder:$order, + BrigMemoryScope:$scope, + equiv:$equiv, + MEMOP:$address, + BrigType:$TypeLength), + opName#"$segment$order$scope$equiv$TypeLength\t$dest, $address" +>, AtomicNoRet { + let atomicOperation = opVal; +} + +class InstAtomic_1Op BrigVal, + int opVal, HSAILProfile P, string noRetOp = ""> : + HSAILInstAtomic< + BrigVal, + (outs P.DestRC:$dest), + (ins BrigAtomicOperation:$op, + AddressSpace:$segment, + BrigMemoryOrder:$order, + BrigMemoryScope:$scope, + equiv:$equiv, + MEMOP:$address, + P.Src0RC:$src0, + BrigType:$TypeLength), + opName#"$segment$order$scope$equiv$TypeLength\t$dest, $address, $src0" +>, AtomicNoRet { + let atomicOperation = opVal; +} + +class InstAtomic_2Op BrigVal, + int opVal, HSAILProfile P, string noRetOp = ""> : + HSAILInstAtomic< + BrigVal, + (outs P.DestRC:$dest), + (ins BrigAtomicOperation:$op, + AddressSpace:$segment, + BrigMemoryOrder:$order, + BrigMemoryScope:$scope, + equiv:$equiv, + MEMOP:$address, + P.Src0RC:$src0, + P.Src1RC:$src1, + BrigType:$TypeLength), + opName#"$segment$order$scope$equiv$TypeLength\t$dest, $address, $src0, $src1" +>, AtomicNoRet { + let atomicOperation = opVal; +} + +multiclass InstAtomic_0Op_BitTypes BrigVal, + int opVal, string noRetOp = ""> { + def _B32 : InstAtomic_0Op; + def _B64 : InstAtomic_0Op; +} + +multiclass InstAtomic_1Op_BitTypes BrigVal, + int opVal, string noRetOp = ""> { + def _B32 : InstAtomic_1Op; + def _B64 : InstAtomic_1Op; +} + +multiclass InstAtomic_2Op_BitTypes BrigVal, int opVal, string noRetOp = ""> { + def _B32 : InstAtomic_2Op; + def _B64 : InstAtomic_2Op; +} + +multiclass InstAtomic_1Op_IntTypes BrigVal, int opVal, string noRetOp = ""> { + def _S32 : InstAtomic_1Op; + def _S64 : InstAtomic_1Op; + + def _U32 : InstAtomic_1Op; + def _U64 : InstAtomic_1Op; +} + +class InstAtomic_0Op_NoRet BrigVal, int opVal, + HSAILProfile P, string typeSuffix> : + HSAILInstAtomic< + BrigVal, + (outs), + (ins BrigAtomicOperation:$op, + AddressSpace:$segment, + BrigMemoryOrder:$order, + BrigMemoryScope:$scope, + equiv:$equiv, + MEMOP:$address, + BrigType:$TypeLength), + opName#"$segment$order$scope$equiv$TypeLength\t$address" +>, AtomicNoRet { + let atomicOperation = opVal; +} + +class InstAtomic_1Op_NoRet BrigVal, + int opVal, HSAILProfile P, + string typeSuffix> : + HSAILInstAtomic< + BrigVal, + (outs), + (ins BrigAtomicOperation:$op, + AddressSpace:$segment, + BrigMemoryOrder:$order, + BrigMemoryScope:$scope, + equiv:$equiv, + MEMOP:$address, + P.Src0RC:$src0, + BrigType:$TypeLength), + opName#"$segment$order$scope$equiv$TypeLength\t$address, $src0" +>, AtomicNoRet { + let atomicOperation = opVal; +} + +class InstAtomic_2Op_NoRet BrigVal, int opVal, + HSAILProfile P, string typeSuffix> : + HSAILInstAtomic< + BrigVal, + (outs), + (ins BrigAtomicOperation:$op, + AddressSpace:$segment, + BrigMemoryOrder:$order, + BrigMemoryScope:$scope, + equiv:$equiv, + MEMOP:$address, + P.Src0RC:$src0, + P.Src1RC:$src1, + BrigType:$TypeLength), + opName#"$segment$order$scope$equiv$TypeLength\t$address, $src0, $src1" +>, AtomicNoRet { + let atomicOperation = opVal; +} + +multiclass InstAtomic_1Op_NoRet_BitTypes BrigVal, int opVal> { + def _B32 : InstAtomic_1Op_NoRet; + def _B64 : InstAtomic_1Op_NoRet; +} + +multiclass InstAtomic_1Op_NoRet_IntTypes BrigVal, int opVal> { + def _S32 : InstAtomic_1Op_NoRet; + def _S64 : InstAtomic_1Op_NoRet; + + def _U32 : InstAtomic_1Op_NoRet; + def _U64 : InstAtomic_1Op_NoRet; +} + +// InstCmp +class HSAILInstCmp BrigVal, + dag outs, dag ins, string asmstr> : + HSAILInst { + let hasSideEffects = 0; + let InstCmp = 1; + let isCompare = 1; +} + +class InstCmp BrigVal, HSAILProfile P> : + HSAILInstCmp; + +multiclass InstCmp_CmpTypes BrigVal, BRIGType destTy> { + def _B1 : InstCmp("Inst"#destTy.InstName#"_B1_B1")>; + def _S32 : InstCmp("Inst"#destTy.InstName#"_S32_S32")>; + def _S64 : InstCmp("Inst"#destTy.InstName#"_S64_S64")>; + def _U32 : InstCmp("Inst"#destTy.InstName#"_U32_U32")>; + def _U64 : InstCmp("Inst"#destTy.InstName#"_U64_U64")>; + def _F32 : InstCmp("Inst"#destTy.InstName#"_F32_F32")>; + def _F64 : InstCmp("Inst"#destTy.InstName#"_F64_F64")>; +} + +// TODO: Support for other return types. +multiclass InstCmp_RetTypes BrigVal> { + defm _B1 : InstCmp_CmpTypes; + // defm _S32 : InstCmp_CmpTypes; + // defm _U32 : InstCmp_CmpTypes; + // defm _S64 : InstCmp_CmpTypes; + // defm _U64 : InstCmp_CmpTypes; + // defm _F32 : InstCmp_CmpTypes; + // defm _F64 : InstCmp_CmpTypes; +} + + +// InstAddr +class HSAILInstAddr BrigVal, + dag outs, dag ins, string asmstr> : + HSAILInst { + let hasSideEffects = 0; + let InstAddr = 1; +} + +class HSAILInstAddr_1Op BrigVal, HSAILProfile P> : + HSAILInstAddr; + +multiclass InstAddr_1Op_PtrTypes BrigVal> { + def _U32 : HSAILInstAddr_1Op; + def _U64 : HSAILInstAddr_1Op; +} + +// InstImage +class HSAILInstImage BrigVal, + dag outs, dag ins, string asmstr> : + HSAILInst { + let mayLoad = 1; + let mayStore = 1; + let InstImage = 1; +} + +class HSAILInstImage_Ld_1D BrigVal> : + HSAILInstImage { + let mayStore = 0; +} + +//////////////////////////////////////////////////////////////////////////////// +// Basic instruction patterns +//////////////////////////////////////////////////////////////////////////////// + +class InstBasic_0Op_Pat : Pat < + (vt (node)), + (inst bt) +>; + +class InstBasic_0Op_NoRet_Pat : Pat < + (node), + (inst BrigType.NONE) +>; + +class InstBasic_1Op_Pat : Pat < + (vt (node (GPROrImm vt:$src0))), + (inst $src0, bt) +>; + +multiclass InstBasic_1Op_IntTypes_Pat { + def : InstBasic_1Op_Pat(inst#!if(signed, "_S32", "_U32")), node, i32, + !if(signed, BrigType.S32, BrigType.U32) + >; + + def : InstBasic_1Op_Pat(inst#!if(signed, "_S64", "_U64")), node, i64, + !if(signed, BrigType.S64, BrigType.U64) + >; +} + +multiclass InstBasic_1Op_BitTypes_Pat { + def : InstBasic_1Op_Pat(inst#"_B32"), node, i32, + BrigType.B32 + >; + + def : InstBasic_1Op_Pat(inst#"_B64"), node, i64, + BrigType.B64 + >; + + def : InstBasic_1Op_Pat(inst#"_B1"), node, i1, + BrigType.B1 + >; +} + +multiclass InstBasic_1Op_FPTypes_Pat { + def : InstBasic_1Op_Pat(inst#"_F32"), node, f32, BrigType.F32>; + def : InstBasic_1Op_Pat(inst#"_F64"), node, f64, BrigType.F64>; +} + +class InstBasic_2Op_Pat : Pat < + (vt0 (node (GPROrImm vt0:$src0), (vt1 (GPROrImm vt1:$src1)))), + (inst $src0, $src1, bt) +>; + +multiclass InstBasic_2Op_IntTypes_Pat { + def : InstBasic_2Op_Pat(inst#!if(signed, "_S32", "_U32")), node, i32, i32, + !if(signed, BrigType.S32, BrigType.U32) + >; + + def : InstBasic_2Op_Pat(inst#!if(signed, "_S64", "_U64")), node, i64, i64, + !if(signed, BrigType.S64, BrigType.U64) + >; +} + +multiclass InstBasic_2Op_BitIntTypes_Pat { + def : InstBasic_2Op_Pat(inst#"_B32"), node, i32, i32, + BrigType.B32 + >; + + def : InstBasic_2Op_Pat(inst#"_B64"), node, i64, i64, + BrigType.B64 + >; + + def : InstBasic_2Op_Pat(inst#"_B1"), node, i1, i1, + BrigType.B1 + >; +} + +multiclass InstBasic_2Op_FPTypes_Pat { + def : InstBasic_2Op_Pat; + def : InstBasic_2Op_Pat; +} + +class InstBasic_3Op_Pat : Pat < + (node (vt (GPROrImm vt:$src0)), (vt (GPROrImm vt:$src1)), (vt (GPROrImm vt:$src2))), + (inst $src0, $src1, $src2, bt) +>; +multiclass InstBasic_3Op_IntTypes_Pat { + def : InstBasic_3Op_Pat(inst# !if(signed, "_S32", "_U32")), node, i32, + !if(signed, BrigType.S32, BrigType.U32) + >; + + def : InstBasic_3Op_Pat(inst# !if(signed, "_S64", "_U64")), node, i64, + !if(signed, BrigType.S64, BrigType.U64) + >; +} + +// XXX - b1 missing intentionally with current set of instructions. +multiclass InstBasic_3Op_BitTypes_Pat { + def : InstBasic_3Op_Pat(inst#"_B32"), node, i32, BrigType.B32>; + def : InstBasic_3Op_Pat(inst#"_B64"), node, i64, BrigType.B64>; +} + +multiclass InstBasic_3Op_FPTypes_Pat { + def : InstBasic_3Op_Pat(inst#"_F32"), node, f32, BrigType.F32>; + def : InstBasic_3Op_Pat(inst#"_F64"), node, f64, BrigType.F64>; +} + +class InstBasic_CMov_Pat : Pat < + (select i1:$src0, (GPROrImm vt:$src1), (GPROrImm vt:$src2)), + (vt (inst $src0, $src1, $src2, bt)) +>; + +class InstBasic_3Op_BitExtract_Pat : Pat < + (node (vt (GPROrImm vt:$src0)), (i32 (GPROrImm i32:$src1)), (i32 (GPROrImm i32:$src2))), + (inst $src0, $src1, $src2, bt) +>; + +multiclass InstBasic_3Op_BitExtract_IntTypes_Pat { + def : InstBasic_3Op_BitExtract_Pat(inst# !if(signed, "_S32", "_U32")), node, i32, + !if(signed, BrigType.S32, BrigType.U32) + >; + + def : InstBasic_3Op_BitExtract_Pat(inst# !if(signed, "_S64", "_U64")), node, i64, + !if(signed, BrigType.S64, BrigType.U64) + >; + +} + +//////////////////////////////////////////////////////////////////////////////// +// Mod instruction patterns +//////////////////////////////////////////////////////////////////////////////// + +class InstMod_1Op_Pat : Pat < + (vt (node (GPROrImm vt:$src0))), + (inst ftz, round, $src0, bt) +>; + +class InstMod_2Op_Pat : Pat < + (vt (node (vt (GPROrImm vt:$src0)), (vt (GPROrImm vt:$src1)))), + (inst ftz, round, $src0, $src1, bt) +>; + +class InstMod_3Op_Pat : Pat < + (vt (node (GPROrImm vt:$src0), (GPROrImm vt:$src1), (GPROrImm vt:$src2))), + (inst ftz, round, $src0, $src1, $src2, bt) +>; + +multiclass InstMod_1Op_FPTypes_Pat { + def : InstMod_1Op_Pat(inst#"_F32"), node, f32, BrigType.F32, round, 1>; + def : InstMod_1Op_Pat(inst#"_F64"), node, f64, BrigType.F64, round, 0>; +} + +multiclass InstMod_2Op_IntTypes_Pat { + def : InstMod_2Op_Pat(inst# !if(signed, "_S32", "_U32")), node, i32, + !if(signed, BrigType.S32, BrigType.U32) + >; + + def : InstMod_2Op_Pat(inst# !if(signed, "_S64", "_U64")), node, i64, + !if(signed, BrigType.S64, BrigType.U64) + >; +} + +multiclass InstMod_2Op_FPTypes_Pat { + def : InstMod_2Op_Pat(inst#"_F32"), node, f32, BrigType.F32, round, 1>; + def : InstMod_2Op_Pat(inst#"_F64"), node, f64, BrigType.F64, round, 0>; +} + +multiclass InstMod_3Op_FPTypes_Pat { + def : InstMod_3Op_Pat(inst#"_F32"), node, f32, BrigType.F32, BrigRound.FLOAT_DEFAULT, 1>; + def : InstMod_3Op_Pat(inst#"_F64"), node, f64, BrigType.F64, BrigRound.FLOAT_DEFAULT, 0>; +} + +//////////////////////////////////////////////////////////////////////////////// +// SourceType instruction patterns +//////////////////////////////////////////////////////////////////////////////// + +class InstSourceType_1Op_Pat : Pat < + (vt (node (GPROrImm vt:$src0))), + (inst $src0, destbt, srcbt) +>; + +class InstSourceType_2Op_Pat : Pat < + (vt (node (GPROrImm vt:$src0), (GPROrImm vt:$src1))), + (inst $src0, $src1, destbt, srcbt) +>; + +class InstSourceType_Class_Pat : Pat < + (i1 (node (vt (GPROrImm vt:$src0)), (i32 (GPROrImm i32:$src1)))), + (inst $src0, $src1, BrigType.B1, srcbt) +>; + +class InstSourceType_3Op_Pat : Pat < + (vt (node (GPROrImm vt:$src0), (GPROrImm vt:$src1), (GPROrImm vt:$src2))), + (inst $src0, $src1, $src2, destbt, srcbt) +>; + +class InstSourceType_4Op_Pat : Pat < + (node (GPROrImm vt:$src0), + (GPROrImm vt:$src1), + (GPROrImm vt:$src2), + (GPROrImm vt:$src3)), + (inst $src0, $src1, $src2, $src3, destbt, srcbt) +>; + +//////////////////////////////////////////////////////////////////////////////// +// Lane instruction patterns +//////////////////////////////////////////////////////////////////////////////// + +class ActiveLanePermutePat : Pat< + (vt (node (i32 timm:$width), + (vt (GPROrImm vt:$src0)), + (i32 (GPROrImm i32:$src1)), + (vt (GPROrImm vt:$src2)), + (i1 (GPROrImm i1:$src3)))), + (inst (i32 $width), $src0, $src1, $src2, $src3, bt, BrigType.NONE) +>; + +class ActiveLaneIdPat : Pat< + (vt (node (i32 timm:$width))), + (inst $width, bt, BrigType.NONE) +>; + +class ActiveLaneCountPat : Pat< + (vt (node (i32 timm:$width), (i1 (GPROrImm i1:$src0)))), + (inst $width, $src0, bt, BrigType.B1) +>; + +//////////////////////////////////////////////////////////////////////////////// +// Br instruction patterns +//////////////////////////////////////////////////////////////////////////////// + +class InstBr_0Op_NoRet_Pat : Pat < + (node), + (inst width, BrigType.NONE) +>; + +class InstBr_0Op_Pat : Pat < + (vt (node)), + (inst width, bt) +>; + +//////////////////////////////////////////////////////////////////////////////// +// Seg instruction patterns +//////////////////////////////////////////////////////////////////////////////// + + +class InstSeg_0Op_Pat : Pat < + (vt (node)), + (inst segment, bt) +>; + +//////////////////////////////////////////////////////////////////////////////// +// Addr instruction patterns +//////////////////////////////////////////////////////////////////////////////// + +class InstAddr_1Op_Pat : Pat < + (node ADDR:$address), + (inst segment, MEMOP:$address, bt) +>; + +//////////////////////////////////////////////////////////////////////////////// +// Atomic instruction patterns +//////////////////////////////////////////////////////////////////////////////// + +class AtomicPat_0Op_Pat : Pat< + (node (AtomicAddr AddressSpace:$segment, + BrigMemoryOrder:$order, + BrigMemoryScope:$scope, + equiv:$equiv, + MEMOP:$address)), + (vt (inst inst.atomicOperation, + $segment, + $order, + $scope, + $equiv, + MEMOP:$address, + bt)) +>; + +class AtomicPat_1Op_Pat : Pat< + (node (AtomicAddr AddressSpace:$segment, + BrigMemoryOrder:$order, + BrigMemoryScope:$scope, + equiv:$equiv, + MEMOP:$address), + (GPROrImm vt:$src0)), + (vt (inst inst.atomicOperation, + $segment, + $order, + $scope, + $equiv, + MEMOP:$address, + $src0, + bt)) +>; + +class AtomicPat_2Op_Pat : Pat< + (node (AtomicAddr AddressSpace:$segment, + BrigMemoryOrder:$order, + BrigMemoryScope:$scope, + equiv:$equiv, + MEMOP:$address), + (GPROrImm vt:$src0), + (GPROrImm vt:$src1)), + (vt (inst inst.atomicOperation, + $segment, + $order, + $scope, + $equiv, + MEMOP:$address, + vt:$src0, + vt:$src1, + bt)) +>; + +class AtomicPat_1Op_NoRet_Pat : Pat< + (node (AtomicAddr AddressSpace:$segment, + BrigMemoryOrder:$order, + BrigMemoryScope:$scope, + equiv:$equiv, + MEMOP:$address), + (vt (GPROrImm vt:$src0))), + (inst inst.atomicOperation, + $segment, + $order, + $scope, + $equiv, + MEMOP:$address, + vt:$src0, + bt) +>; + +multiclass AtomicPat_0Op_BitTypes { + def : AtomicPat_0Op_Pat< + !cast(inst#"_B32"), node, i32, BrigType.B32 + >; + + def : AtomicPat_0Op_Pat< + !cast(inst#"_B64"), node, i64, BrigType.B64 + >; +} + +multiclass AtomicPat_1Op_IntTypes { + def : AtomicPat_1Op_Pat< + !cast(inst#!if(signed, "_S32", "_U32")), node, i32, + !if(signed, BrigType.S32, BrigType.U32) + >; + + def : AtomicPat_1Op_Pat< + !cast(inst#!if(signed, "_S64", "_U64")), node, i64, + !if(signed, BrigType.S64, BrigType.U64) + >; +} + +multiclass AtomicPat_1Op_BitTypes { + def : AtomicPat_1Op_Pat< + !cast(inst#"_B32"), node, i32, BrigType.B32 + >; + + def : AtomicPat_1Op_Pat< + !cast(inst#"_B64"), node, i64, BrigType.B64 + >; +} + +multiclass AtomicPat_1Op_NoRet_BitTypes { + def : AtomicPat_1Op_NoRet_Pat< + !cast(inst#"_B32"), node, i32, BrigType.B32 + >; + + def : AtomicPat_1Op_NoRet_Pat< + !cast(inst#"_B64"), node, i64, BrigType.B64 + >; +} + +multiclass AtomicPat_2Op_BitTypes { + def : AtomicPat_2Op_Pat< + !cast(inst#"_B32"), node, i32, BrigType.B32 + >; + + def : AtomicPat_2Op_Pat< + !cast(inst#"_B64"), node, i64, BrigType.B64 + >; +} + +def target : Operand {} Index: lib/Target/HSAIL/HSAILInstrInfo.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILInstrInfo.h @@ -0,0 +1,200 @@ +//===- HSAILInstrInfo.h - HSAIL Instruction Information --------*- C++ -*- ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the HSAIL implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef _HSAIL_INSTRUCTION_INFO_H_ +#define _HSAIL_INSTRUCTION_INFO_H_ + +#include "HSAIL.h" +#include "HSAILRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" + +#define GET_INSTRINFO_HEADER +#include "HSAILGenInstrInfo.inc" + +namespace llvm { +class HSAILSubtarget; +class RegScavenger; + +class HSAILInstrInfo : public HSAILGenInstrInfo { + const HSAILRegisterInfo RI; + +public: + explicit HSAILInstrInfo(HSAILSubtarget &st); + + ~HSAILInstrInfo(); + + const HSAILRegisterInfo &getRegisterInfo() const { return RI; } + + bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, + unsigned &DstReg, unsigned &SubIdx) const override; + + unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const override; + + unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const override; + + unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const override; + + unsigned isStoreToStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const override; + + bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify = false) const override; + + unsigned RemoveBranch(MachineBasicBlock &MBB) const override; + + unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond, + DebugLoc DL) const override; + + void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + DebugLoc DL, unsigned DestReg, unsigned SrcReg, + bool KillSrc) const override; + + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, unsigned SrcReg, + bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, unsigned DestReg, + int FrameIndex, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + +public: + bool areLoadsFromSameBasePtr(SDNode *Node1, SDNode *Node2, int64_t &Offset1, + int64_t &Offset2) const override; + + bool shouldScheduleLoadsNear(SDNode *Node1, SDNode *Node2, int64_t Offset1, + int64_t Offset2, + unsigned NumLoads) const override; + + bool + ReverseBranchCondition(SmallVectorImpl &Cond) const override; + + bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override; + + RegScavenger *getRS() const { return RS; } + + bool expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const override; + + const TargetRegisterClass *getOpRegClass(const MachineRegisterInfo &MRI, + const MachineInstr &MI, + unsigned OpNo) const; + + bool verifyInstruction(const MachineInstr *MI, + StringRef &ErrInfo) const override; + + /// \brief Returns the operand named \p Op. If \p MI does not have an + /// operand named \c Op, this function returns nullptr. + MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const; + + const MachineOperand *getNamedOperand(const MachineInstr &MI, + unsigned OpName) const { + return getNamedOperand(const_cast(MI), OpName); + } + + int64_t getNamedModifierOperand(const MachineInstr &MI, + unsigned OpName) const { + return getNamedOperand(MI, OpName)->getImm(); + } + + int64_t getNamedModifierOperand(MachineInstr &MI, unsigned OpName) const { + return getNamedOperand(MI, OpName)->getImm(); + } + + bool isInstBasic(uint16_t Opcode) const { + return get(Opcode).TSFlags & HSAILInstrFlags::InstBasic; + } + + bool isInstMod(uint16_t Opcode) const { + return get(Opcode).TSFlags & HSAILInstrFlags::InstMod; + } + + bool isInstSourceType(uint16_t Opcode) const { + return get(Opcode).TSFlags & HSAILInstrFlags::InstSourceType; + } + + bool isInstLane(uint16_t Opcode) const { + return get(Opcode).TSFlags & HSAILInstrFlags::InstLane; + } + + bool isInstBr(uint16_t Opcode) const { + return get(Opcode).TSFlags & HSAILInstrFlags::InstBr; + } + + bool isInstSeg(uint16_t Opcode) const { + return get(Opcode).TSFlags & HSAILInstrFlags::InstSeg; + } + + bool isInstSegCvt(uint16_t Opcode) const { + return get(Opcode).TSFlags & HSAILInstrFlags::InstSegCvt; + } + + bool isInstMemFence(uint16_t Opcode) const { + return get(Opcode).TSFlags & HSAILInstrFlags::InstMemFence; + } + + bool isInstCmp(uint16_t Opcode) const { + return get(Opcode).TSFlags & HSAILInstrFlags::InstCmp; + } + + bool isInstMem(uint16_t Opcode) const { + return get(Opcode).TSFlags & HSAILInstrFlags::InstMem; + } + + bool isInstAtomic(uint16_t Opcode) const { + return get(Opcode).TSFlags & HSAILInstrFlags::InstAtomic; + } + + bool isInstImage(uint16_t Opcode) const { + return get(Opcode).TSFlags & HSAILInstrFlags::InstImage; + } + + bool isInstCvt(uint16_t Opcode) const { + return get(Opcode).TSFlags & HSAILInstrFlags::InstCvt; + } + + bool isInstAddr(uint16_t Opcode) const { + return get(Opcode).TSFlags & HSAILInstrFlags::InstAddr; + } + + uint16_t getBrigOpcode(uint16_t Opcode) const { + return (get(Opcode).TSFlags & HSAILInstrFlags::InstBrigOpcode) >> + Log2_64(HSAILInstrFlags::InstBrigOpcodeLo); + } + +private: + RegScavenger *RS; +}; + +namespace HSAIL { +int getAtomicRetOp(uint16_t Opcode); +int getAtomicNoRetOp(uint16_t Opcode); + +int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex); + +// FIXME: This is a thin wrapper around the similarly named and generated +// getLdStVectorOpcode, which we should use directly. +int getVectorLdStOpcode(uint16_t Opcode, unsigned vsize); +} + +} // End llvm namespace + +#endif // _HSAIL_INSTR_INFO_H_ Index: lib/Target/HSAIL/HSAILInstrInfo.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILInstrInfo.cpp @@ -0,0 +1,942 @@ +//===-- HSAILInstrInfo.cpp - HSAIL Instruction Information ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HSAILInstrInfo.h" +#include "HSAILBrigDefs.h" +#include "HSAILTargetMachine.h" +#include "HSAILUtilityFunctions.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Support/CommandLine.h" + +#include + +#define GET_INSTRINFO_CTOR_DTOR +#define GET_INSTRMAP_INFO +#include "HSAILGenInstrInfo.inc" +using namespace llvm; +namespace llvm { + +static cl::opt DisableBranchAnalysis("disable-branch-analysis", + cl::Hidden, + cl::desc("Disable branch analysis")); +static cl::opt + DisableCondReversion("disable-branch-cond-reversion", cl::Hidden, + cl::desc("Disable branch condition reversion")); + +// Reverse conditions in branch analysis +// It marks whether or not we need to reverse condition +// when we insert new branch +enum CondReverseFlag { + COND_IRREVERSIBLE, // For branches that can not be reversed + COND_REVERSE_POSITIVE, // Don't need invertion + COND_REVERSE_NEGATIVE, // Need invertion + COND_REVERSE_DEPENDANT // Indicates that this condition has exactly + // one depency which should be reverted with it +}; + +HSAILInstrInfo::HSAILInstrInfo(HSAILSubtarget &st) + : HSAILGenInstrInfo(), + // : TargetInstrInfoImpl(HSAILInsts, array_lengthof(HSAILInsts)), + RI(st) { + RS = new RegScavenger(); +} + +HSAILInstrInfo::~HSAILInstrInfo() { delete RS; } + +bool HSAILInstrInfo::isCoalescableExtInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SubIdx) const { + // HSAIL does not have any registers that overlap and cause + // an extension. + return false; +} + +unsigned HSAILInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + const MCInstrDesc &MCID = get(MI->getOpcode()); + if (!MCID.mayLoad() || !MI->hasOneMemOperand()) + return HSAIL::NoRegister; + + const MachineOperand *Segment = getNamedOperand(*MI, HSAIL::OpName::segment); + if (!Segment || Segment->getImm() != HSAILAS::SPILL_ADDRESS) + return HSAIL::NoRegister; + + int AddressIdx = + HSAIL::getNamedOperandIdx(MI->getOpcode(), HSAIL::OpName::address); + const MachineOperand &Base = MI->getOperand(AddressIdx + HSAILADDRESS::BASE); + + if (Base.isFI()) { + FrameIndex = Base.getIndex(); + return MI->getOperand(0).getReg(); + } + + return HSAIL::NoRegister; +} + +unsigned HSAILInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const { + return isLoadFromStackSlot(MI, FrameIndex); +} + +unsigned HSAILInstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + const MCInstrDesc &MCID = get(MI->getOpcode()); + if (!MCID.mayStore() || !MI->hasOneMemOperand()) + return 0; + + const MachineOperand *Segment = getNamedOperand(*MI, HSAIL::OpName::segment); + if (!Segment || Segment->getImm() != HSAILAS::SPILL_ADDRESS) + return HSAIL::NoRegister; + + int AddressIdx = + HSAIL::getNamedOperandIdx(MI->getOpcode(), HSAIL::OpName::address); + const MachineOperand &Base = MI->getOperand(AddressIdx + HSAILADDRESS::BASE); + if (Base.isFI()) { + FrameIndex = Base.getIndex(); + return MI->getOperand(0).getReg(); + } + + return HSAIL::NoRegister; +} + +unsigned HSAILInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const { + return isStoreToStackSlot(MI, FrameIndex); +} + +static bool IsDefBeforeUse(MachineBasicBlock &MBB, unsigned Reg, + const MachineRegisterInfo &MRI, bool &CanReverse) { + // TODO_HSA: With LiveVariable analysis we can make it + // a lot more effectively. + // But currently we can not rely on any of the analysis results + // In latest llvm exist MRI::tracksLiveness flag + // if it is true we don't need this costly bfs search + + CanReverse = true; + + if (MRI.hasOneUse(Reg)) + return true; + + std::queue Q; + SmallPtrSet Visited; + + Q.push(&MBB); + + while (!Q.empty()) { + MachineBasicBlock *cur_mbb = Q.front(); + Q.pop(); + + for (MachineBasicBlock::succ_iterator succ = cur_mbb->succ_begin(), + succ_end = cur_mbb->succ_end(); + succ != succ_end; ++succ) + if (!Visited.count(*succ)) { + Visited.insert(*succ); + + bool need_process_futher = true; + + // Process basic block + for (MachineBasicBlock::iterator instr = (*succ)->begin(), + instr_end = (*succ)->end(); + instr != instr_end; ++instr) { + if (instr->readsRegister(Reg)) { + // Always abort on circular dependencies + // Which will require to insert or remove not + if (instr->getParent() == &MBB && + (instr->isBranch() || (instr->getOpcode() == HSAIL::NOT_B1))) { + CanReverse = false; + } + + return false; + } + if (instr->definesRegister(Reg)) { + need_process_futher = false; + break; + } + } + + // Schedule basic block + if (need_process_futher) + Q.push(*succ); + } + } + + return true; +} + +static bool CheckSpillAfterDef(MachineInstr *start, unsigned reg, + bool &canBeSpilled) { + MachineBasicBlock *MBB = start->getParent(); + MachineBasicBlock::reverse_iterator B(start); + MachineBasicBlock::reverse_iterator E = MBB->rend(); + if (E == B) + return false; // empty block check + ++B; // skip branch instr itself + for (MachineBasicBlock::reverse_iterator I = B; I != E; ++I) { + if (I->definesRegister(reg)) { + return true; + } + if (I->readsRegister(reg) && (HSAIL::isConv(&*I) || I->mayStore())) { + canBeSpilled = true; + return true; + } + } + return false; +} + +static bool IsSpilledAfterDef(MachineInstr *start, unsigned reg) { + bool canBeSpilled = false; + if (!CheckSpillAfterDef(start, reg, canBeSpilled)) { + std::queue Q; + SmallPtrSet Visited; + MachineBasicBlock *MBB = start->getParent(); + Q.push(MBB); + while (!Q.empty() && !canBeSpilled) { + MachineBasicBlock *cur_mbb = Q.front(); + Q.pop(); + for (MachineBasicBlock::pred_iterator pred = cur_mbb->pred_begin(), + pred_end = cur_mbb->pred_end(); + pred != pred_end; ++pred) { + if (!Visited.count(*pred) && !(*pred)->empty()) { + Visited.insert(*pred); + MachineInstr *instr; + MachineBasicBlock::instr_iterator termIt = + (*pred)->getFirstInstrTerminator(); + if (termIt == (*pred)->instr_end()) { + instr = &*(*pred)->rbegin(); + } else { + instr = termIt; + } + if (!CheckSpillAfterDef(instr, reg, canBeSpilled)) { + Q.push(*pred); + } + } + } + } + } + return canBeSpilled; +} + +bool HSAILInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const { + const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + if (DisableBranchAnalysis) + return true; + + // Start from the bottom of the block and work up, examining the + // terminator instructions. + MachineBasicBlock::iterator I = MBB.end(); + MachineBasicBlock::iterator UnCondBrIter = MBB.end(); + + while (I != MBB.begin()) { + --I; + if (I->isDebugValue()) + continue; + + // Working from the bottom, when we see a non-terminator instruction, we're + // done. + if (!isUnpredicatedTerminator(I)) + break; + + // A terminator that isn't a branch can't easily be handled by this + // analysis. + if (!I->getDesc().isBranch()) + return true; + + // Handle unconditional branches. + if (I->getOpcode() == HSAIL::BR) { + int Src0Idx = HSAIL::getNamedOperandIdx(HSAIL::BR, HSAIL::OpName::src0); + UnCondBrIter = I; + + Cond.clear(); + FBB = 0; + + if (!AllowModify) { + TBB = I->getOperand(Src0Idx).getMBB(); + continue; + } + + // If the block has any instructions after a JMP, delete them. + while (std::next(I) != MBB.end()) + std::next(I)->eraseFromParent(); + + // Delete the JMP if it's equivalent to a fall-through. + if (MBB.isLayoutSuccessor(I->getOperand(Src0Idx).getMBB())) { + TBB = 0; + I->eraseFromParent(); + I = MBB.end(); + UnCondBrIter = MBB.end(); + continue; + } + + // TBB is used to indicate the unconditional destination. + TBB = I->getOperand(Src0Idx).getMBB(); + continue; + } + + // Handle conditional branches. + + // First conditional branch + if (Cond.empty()) { + int Src0Idx = HSAIL::getNamedOperandIdx(HSAIL::CBR, HSAIL::OpName::src0); + int Src1Idx = HSAIL::getNamedOperandIdx(HSAIL::CBR, HSAIL::OpName::src1); + + FBB = TBB; + TBB = I->getOperand(Src1Idx).getMBB(); + + // Insert condition as pair - (register, reverse flag) + // Or in case if there is dependencies + // (register, COND_REVERSE_DEPENDANT, free reg num, reverse flag) + Cond.push_back(I->getOperand(Src0Idx)); + + if (DisableCondReversion) { + Cond.push_back(MachineOperand::CreateImm(COND_IRREVERSIBLE)); + continue; + } + + // Determine condition dependencies + unsigned reg = I->getOperand(Src0Idx).getReg(); + bool can_reverse = false; + bool is_def_before_use = IsDefBeforeUse(MBB, reg, MRI, can_reverse); + if (can_reverse) { + /* Here we're taking care of the possible control register spilling + that occur between it's definition and branch. If it does, we're not + allowed to inverse branch because some other place rely on the + unspilled value. + */ + can_reverse = !IsSpilledAfterDef(I, reg); + } + // Can not reverse instruction which will require to + // insert or remove 'not_b1' inside loop + // Also, we avoid reversing for that comparisons + // whose result is spilled in between the definition and use. + if (!can_reverse) { + Cond.push_back(MachineOperand::CreateImm(COND_IRREVERSIBLE)); + continue; + } + + // If there is no uses of condition register we can just reverse + // instruction and be fine + if (is_def_before_use) { + Cond.push_back(MachineOperand::CreateImm(COND_REVERSE_POSITIVE)); + continue; + } + + // There is uses of this instruction somewhere down the control flow + // Try to use RegisterScavenger to get free register + // If there is no such one than do not inverse condition + if (!MRI.tracksLiveness()) { + Cond.push_back(MachineOperand::CreateImm(COND_IRREVERSIBLE)); + continue; + } + + unsigned free_reg = 0; + if (!TargetRegisterInfo::isVirtualRegister(Cond[0].getReg())) { + RS->enterBasicBlock(&MBB); + RS->forward(std::prev(MBB.end())); + + free_reg = RS->FindUnusedReg(&HSAIL::CRRegClass); + if (free_reg == 0) { + Cond.push_back(MachineOperand::CreateImm(COND_IRREVERSIBLE)); + continue; + } + RS->setRegUsed(free_reg); + } + + // Everything is ok - mark condition as reversible + Cond.push_back(MachineOperand::CreateImm(COND_REVERSE_DEPENDANT)); + Cond.push_back(MachineOperand::CreateImm(free_reg)); + Cond.push_back(MachineOperand::CreateImm(COND_REVERSE_POSITIVE)); + continue; + } + + // Can not handle more than one conditional branch + return true; + } + + return false; +} + +static BrigCompareOperation invIntCondOp(BrigCompareOperation Op) { + switch (Op) { + case BRIG_COMPARE_EQ: + return BRIG_COMPARE_NE; + case BRIG_COMPARE_GE: + return BRIG_COMPARE_LT; + case BRIG_COMPARE_GT: + return BRIG_COMPARE_LE; + case BRIG_COMPARE_LE: + return BRIG_COMPARE_GT; + case BRIG_COMPARE_LT: + return BRIG_COMPARE_GE; + case BRIG_COMPARE_NE: + return BRIG_COMPARE_EQ; + default: + return Op; + } +} + +static BrigCompareOperation invFPCondOp(BrigCompareOperation Op) { + switch (Op) { + case BRIG_COMPARE_NUM: + return BRIG_COMPARE_NAN; + case BRIG_COMPARE_EQ: + return BRIG_COMPARE_NEU; + case BRIG_COMPARE_GE: + return BRIG_COMPARE_LTU; + case BRIG_COMPARE_GT: + return BRIG_COMPARE_LEU; + case BRIG_COMPARE_LE: + return BRIG_COMPARE_GTU; + case BRIG_COMPARE_LT: + return BRIG_COMPARE_GEU; + case BRIG_COMPARE_NE: + return BRIG_COMPARE_EQU; + case BRIG_COMPARE_EQU: + return BRIG_COMPARE_NE; + case BRIG_COMPARE_GEU: + return BRIG_COMPARE_LT; + case BRIG_COMPARE_GTU: + return BRIG_COMPARE_LE; + case BRIG_COMPARE_LEU: + return BRIG_COMPARE_GT; + case BRIG_COMPARE_LTU: + return BRIG_COMPARE_GE; + case BRIG_COMPARE_NEU: + return BRIG_COMPARE_EQ; + case BRIG_COMPARE_NAN: + return BRIG_COMPARE_NUM; + default: + return Op; + } +} + +static bool isFPBrigType(BrigType BT) { + switch (BT) { + case BRIG_TYPE_F32: + case BRIG_TYPE_F64: + case BRIG_TYPE_F16: + return true; + default: + return false; + } +} + +// Helper for `HSAILInstrInfo::InsertBranch` +// Reverse branch condition +// Different from `HSAILInstrInfo::ReverseBranchCondition` +// because it actually generates reversion code +// Returns register with condition result +static unsigned GenerateBranchCondReversion(MachineBasicBlock &MBB, + const MachineOperand &CondOp, + const HSAILInstrInfo *TII, + DebugLoc DL) { + assert(CondOp.isReg()); + unsigned cond_reg = CondOp.getReg(); + + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + + MachineInstr *cond_expr = nullptr; + + // Manualy search for latest usage of condition register in MBB + MachineBasicBlock::iterator I = MBB.end(); + + while (I != MBB.begin()) { + --I; + + if (I->definesRegister(cond_reg)) { + cond_expr = &*I; + break; + } + } + + // If condition is compare instruction - reverse it + bool need_insert_not = false; + if (cond_expr && cond_expr->isCompare()) { + MachineOperand *CmpOp = TII->getNamedOperand(*cond_expr, HSAIL::OpName::op); + + BrigType CmpType = static_cast( + TII->getNamedOperand(*cond_expr, HSAIL::OpName::sourceType)->getImm()); + + BrigCompareOperation OrigOp = + static_cast(CmpOp->getImm()); + + BrigCompareOperation RevOp = + isFPBrigType(CmpType) ? invFPCondOp(OrigOp) : invIntCondOp(OrigOp); + + if (OrigOp != RevOp) // Can invert the operation. + CmpOp->setImm(RevOp); + else + need_insert_not = true; + } + // If condition is logical not - just remove it + else if (cond_expr && cond_expr->getOpcode() == HSAIL::NOT_B1) { + cond_reg = cond_expr->getOperand(1).getReg(); + cond_expr->eraseFromParent(); + } else + need_insert_not = true; + + // Else insert logical not + if (need_insert_not) { + // If we are before register allocation we need to maintain SSA form + if (TargetRegisterInfo::isVirtualRegister(CondOp.getReg())) + cond_reg = MRI.createVirtualRegister(MRI.getRegClass(CondOp.getReg())); + + BuildMI(&MBB, DL, TII->get(HSAIL::NOT_B1)) + .addReg(cond_reg, RegState::Define) + .addReg(CondOp.getReg()) + .addImm(BRIG_TYPE_B1); + } + + return cond_reg; +} + +unsigned HSAILInstrInfo::InsertBranch( + MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, + const SmallVectorImpl &Cond, DebugLoc DL) const { + // Shouldn't be a fall through. + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + + if (Cond.empty()) { + // Unconditional branch? + assert(!FBB && "Unconditional branch with multiple successors!"); + BuildMI(&MBB, DL, get(HSAIL::BR)) + .addImm(BRIG_WIDTH_ALL) + .addMBB(TBB) + .addImm(BRIG_TYPE_NONE); + return 1; + } + + // AnalyzeBranch can handle only one condition + if (Cond.size() != 2 && Cond.size() != 4) + return 0; + + // Conditional branch. + // According to HSAIL spec condition MUST be a control register + assert(Cond[0].isReg()); + unsigned cond_reg = Cond[0].getReg(); + + // Reverse condition + switch (static_cast(Cond[1].getImm())) { + case COND_REVERSE_DEPENDANT: + assert(Cond.size() == 4 && Cond[2].isImm()); + + if (Cond[3].getImm() == COND_REVERSE_NEGATIVE) { + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + if (TargetRegisterInfo::isVirtualRegister(Cond[0].getReg())) + cond_reg = MRI.createVirtualRegister(MRI.getRegClass(Cond[0].getReg())); + else + cond_reg = Cond[2].getImm(); + + BuildMI(&MBB, DL, get(HSAIL::NOT_B1)) + .addReg(cond_reg, RegState::Define) + .addReg(Cond[0].getReg()) + .addImm(BRIG_TYPE_B1); + } + + break; + case COND_REVERSE_NEGATIVE: + cond_reg = GenerateBranchCondReversion(MBB, Cond[0], this, DL); + break; + case COND_REVERSE_POSITIVE: + case COND_IRREVERSIBLE: + // Do nothing + break; + } + + unsigned Count = 0; + + BuildMI(&MBB, DL, get(HSAIL::CBR)) + .addImm(BRIG_WIDTH_1) + .addReg(cond_reg) + .addMBB(TBB) + .addImm(BRIG_TYPE_B1); + + ++Count; + + if (FBB) { + // Two-way Conditional branch. Insert the second branch. + BuildMI(&MBB, DL, get(HSAIL::BR)) + .addImm(BRIG_WIDTH_ALL) + .addMBB(FBB) + .addImm(BRIG_TYPE_NONE); + + ++Count; + } + + return Count; +} + +unsigned int HSAILInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator I = MBB.end(); + unsigned Count = 0; + + while (I != MBB.begin()) { + --I; + if (I->isDebugValue()) + continue; + + if (I->getOpcode() != HSAIL::BR && I->getOpcode() != HSAIL::CBR) + break; + + // Remove the branch. + I->eraseFromParent(); + I = MBB.end(); + ++Count; + } + + return Count; +} + +void HSAILInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, + int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + unsigned int Opc = 0; + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + DebugLoc DL; + + unsigned BT; + switch (RC->getID()) { + default: + llvm_unreachable("unrecognized TargetRegisterClass"); + break; + case HSAIL::GPR32RegClassID: + Opc = HSAIL::ST_U32; + BT = BRIG_TYPE_U32; + break; + case HSAIL::GPR64RegClassID: + Opc = HSAIL::ST_U64; + BT = BRIG_TYPE_U64; + break; + case HSAIL::CRRegClassID: { + HSAILMachineFunctionInfo *MFI = MF.getInfo(); + MFI->setHasSpilledCRs(); + Opc = HSAIL::SPILL_B1; + BT = BRIG_TYPE_B1; + break; + } + } + if (MI != MBB.end()) { + DL = MI->getDebugLoc(); + } + + switch (RC->getID()) { + default: + llvm_unreachable("unrecognized TargetRegisterClass"); + break; + case HSAIL::CRRegClassID: + case HSAIL::GPR32RegClassID: + case HSAIL::GPR64RegClassID: { + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(FrameIndex), + MachineMemOperand::MOStore, MFI.getObjectSize(FrameIndex), + MFI.getObjectAlignment(FrameIndex)); + + BuildMI(MBB, MI, DL, get(Opc)) + .addReg(SrcReg, getKillRegState(isKill)) // src + .addFrameIndex(FrameIndex) // address_base + .addReg(HSAIL::NoRegister) // address_reg + .addImm(0) // address_offset + .addImm(BT) // TypeLength + .addImm(HSAILAS::SPILL_ADDRESS) // segment + .addImm(MMO->getAlignment()) + .addMemOperand(MMO); + break; + } + } +} + +void HSAILInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + unsigned int Opc = 0; + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + DebugLoc DL; + + unsigned BT; + switch (RC->getID()) { + default: + llvm_unreachable("unrecognized TargetRegisterClass"); + break; + case HSAIL::GPR32RegClassID: + Opc = HSAIL::LD_U32; + BT = BRIG_TYPE_U32; + break; + case HSAIL::GPR64RegClassID: + Opc = HSAIL::LD_U64; + BT = BRIG_TYPE_U64; + break; + case HSAIL::CRRegClassID: + Opc = HSAIL::RESTORE_B1; + BT = BRIG_TYPE_B1; + break; + } + if (MI != MBB.end()) { + DL = MI->getDebugLoc(); + } + + switch (RC->getID()) { + default: + llvm_unreachable("unrecognized TargetRegisterClass"); + break; + case HSAIL::GPR32RegClassID: + case HSAIL::GPR64RegClassID: + case HSAIL::CRRegClassID: { + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(FrameIndex), + MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex), + MFI.getObjectAlignment(FrameIndex)); + BuildMI(MBB, MI, DL, get(Opc)) + .addReg(DestReg, RegState::Define) // dest + .addFrameIndex(FrameIndex) // address_base + .addReg(HSAIL::NoRegister) // address_reg + .addImm(0) // address_offset + .addImm(BT) // TypeLength + .addImm(HSAILAS::SPILL_ADDRESS) // segment + .addImm(MMO->getAlignment()) // align + .addImm(BRIG_WIDTH_1) // width + .addImm(0) // mask + .addMemOperand(MMO); + break; + } + } +} + +bool HSAILInstrInfo::areLoadsFromSameBasePtr(SDNode *Node1, SDNode *Node2, + int64_t &Offset1, + int64_t &Offset2) const { + // Warning! This function will handle not only load but store nodes too + // because there is no real difference between memory operands in loads and + // stores. + // Do not change name of this function to avoid more changes in core llvm. + + if (!Node1->isMachineOpcode() || !Node2->isMachineOpcode()) + return false; + + MachineSDNode *mnode1 = cast(Node1), + *mnode2 = cast(Node2); + + if (mnode1->memoperands_empty() || mnode2->memoperands_empty()) + return false; + + if ((mnode1->memoperands_begin() + 1) != mnode1->memoperands_end() || + (mnode2->memoperands_begin() + 1) != mnode2->memoperands_end()) + return false; + + MachineMemOperand *mo1, *mo2; + + mo1 = *mnode1->memoperands_begin(); + mo2 = *mnode2->memoperands_begin(); + + // TODO_HSA: Consider extension types to be checked explicitly + if (mo1->getSize() != mo2->getSize() || + mo1->getPointerInfo().getAddrSpace() != + mo2->getPointerInfo().getAddrSpace() || + mo1->getValue() != mo2->getValue() || + mo1->getFlags() != mo2->getFlags()) { + return false; + } + + Offset1 = mo1->getOffset(); + Offset2 = mo2->getOffset(); + + return true; +} + +bool HSAILInstrInfo::shouldScheduleLoadsNear(SDNode *Node1, SDNode *Node2, + int64_t Offset1, int64_t Offset2, + unsigned NumLoads) const { + // Warning! This function will handle not only load but store nodes too + // because there is no real difference between memory operands in loads and + // stores. + + // Assume that 'areLoadsFromSameBasePtr' returned true + + if (!Node1->isMachineOpcode()) + return false; + + MachineSDNode *mnode1 = cast(Node1); + + // Check that loads are close enough + if (Offset2 - Offset1 <= + 4 * (int64_t)(*mnode1->memoperands_begin())->getSize()) + return true; + return false; +} + +bool HSAILInstrInfo::ReverseBranchCondition( + SmallVectorImpl &Cond) const { + if (Cond.size() < 2) + return true; + + // AnalyzeBranch should always return conditions as pairs + assert(Cond.size() % 2 == 0); + + for (SmallVectorImpl::iterator I = Cond.begin(), + E = Cond.end(); + I != E; ++I) { + ++I; + if (static_cast(I->getImm()) == COND_IRREVERSIBLE) + return true; + } + + for (SmallVectorImpl::iterator I = Cond.begin(), + E = Cond.end(); + I != E; ++I) { + ++I; + + assert(I->isImm()); + + CondReverseFlag cond_rev_flag = static_cast(I->getImm()); + + switch (cond_rev_flag) { + case COND_REVERSE_POSITIVE: + cond_rev_flag = COND_REVERSE_NEGATIVE; + break; + case COND_REVERSE_NEGATIVE: + cond_rev_flag = COND_REVERSE_POSITIVE; + break; + case COND_REVERSE_DEPENDANT: + cond_rev_flag = COND_REVERSE_DEPENDANT; + break; + default: + llvm_unreachable("Unknown cond reverse flag"); + } + + I->setImm(cond_rev_flag); + } + + return false; +} + +bool HSAILInstrInfo::isSafeToMoveRegClassDefs( + const TargetRegisterClass *RC) const { + // Micah: HSAIL does not have any constraints about moving defs. + return true; +} + +void HSAILInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + if (HSAIL::GPR32RegClass.contains(DestReg, SrcReg)) { + BuildMI(MBB, MI, DL, get(HSAIL::MOV_B32), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(BRIG_TYPE_B32); + return; + } + + if (HSAIL::GPR64RegClass.contains(DestReg, SrcReg)) { + BuildMI(MBB, MI, DL, get(HSAIL::MOV_B64), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(BRIG_TYPE_B64); + return; + } + + if (HSAIL::CRRegClass.contains(DestReg, SrcReg)) { + BuildMI(MBB, MI, DL, get(HSAIL::MOV_B1), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(BRIG_TYPE_B1); + return; + } + + unsigned SrcBT = -1; + unsigned DestBT = -1; + unsigned CvtOpc = -1; + + if (HSAIL::GPR32RegClass.contains(DestReg) && + HSAIL::CRRegClass.contains(SrcReg)) { + DestBT = BRIG_TYPE_B1; + SrcBT = BRIG_TYPE_U32; + CvtOpc = HSAIL::CVT_B1_U32; + } else if (HSAIL::CRRegClass.contains(DestReg) && + HSAIL::GPR32RegClass.contains(SrcReg)) { + DestBT = BRIG_TYPE_U32; + SrcBT = BRIG_TYPE_B1; + CvtOpc = HSAIL::CVT_U32_B1; + } else if (HSAIL::GPR64RegClass.contains(DestReg) && + HSAIL::GPR32RegClass.contains(SrcReg)) { + DestBT = BRIG_TYPE_U32; + SrcBT = BRIG_TYPE_U64; + CvtOpc = HSAIL::CVT_U32_U64; + } else if (HSAIL::GPR32RegClass.contains(DestReg) && + HSAIL::GPR64RegClass.contains(SrcReg)) { + // Truncation can occur if a function was defined with different return + // types in different places. + DestBT = BRIG_TYPE_U64; + SrcBT = BRIG_TYPE_U32; + CvtOpc = HSAIL::CVT_U64_U32; + } else { + assert(!"When do we hit this?"); + return TargetInstrInfo::copyPhysReg(MBB, MI, DL, DestReg, SrcReg, KillSrc); + } + + BuildMI(MBB, MI, DL, get(CvtOpc), DestReg) + .addImm(0) // ftz + .addImm(0) // round + .addImm(DestBT) // destTypedestLength + .addImm(SrcBT) // srcTypesrcLength + .addReg(SrcReg, getKillRegState(KillSrc)); +} + +bool HSAILInstrInfo::expandPostRAPseudo( + MachineBasicBlock::iterator MBBI) const { + MachineInstr &MI = *MBBI; + return HSAILGenInstrInfo::expandPostRAPseudo(MI); +} + +const TargetRegisterClass * +HSAILInstrInfo::getOpRegClass(const MachineRegisterInfo &MRI, + const MachineInstr &MI, unsigned OpNo) const { + + const MachineOperand &MO = MI.getOperand(OpNo); + if (!MO.isReg()) + return nullptr; + + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return MRI.getRegClass(Reg); + + return RI.getPhysRegClass(Reg); +} + +bool HSAILInstrInfo::verifyInstruction(const MachineInstr *MI, + StringRef &ErrInfo) const { + return true; +} + +MachineOperand *HSAILInstrInfo::getNamedOperand(MachineInstr &MI, + unsigned OperandName) const { + int Idx = HSAIL::getNamedOperandIdx(MI.getOpcode(), OperandName); + if (Idx == -1) + return nullptr; + + return &MI.getOperand(Idx); +} +} + +// FIXME: Should just use generated version directly. +int HSAIL::getVectorLdStOpcode(uint16_t Opcode, unsigned vsize) { + // HSAIL::vec_size enum is generated from instruction mappings and defined in + // HSAILGenInstrInfo.inc. It starts with vec_size_1 value which is equal to + // zero, so we need to subtract one from size. + return HSAIL::getLdStVectorOpcode(Opcode, HSAIL::vec_size(vsize - 1)); +} Index: lib/Target/HSAIL/HSAILInstrInfo.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILInstrInfo.td @@ -0,0 +1,434 @@ +//==- HSAILInstrInfo.td - Main HSAIL Instruction Definition -*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the HSAIL instruction set, defining the instructions, and +// properties of the instructions which are needed for code generation, machine +// code emission, and analysis. +// +//===----------------------------------------------------------------------===// + +def SmallModel : Predicate<"Subtarget->isSmallModel()">; +def LargeModel : Predicate<"Subtarget->isLargeModel()">; +def EnableOpt : Predicate<"CodeGenOpt::None != TM.getOptLevel()">; + +// Mark the default value for a width modifier. This only effects how +// the instruction is printed in the non-BRIG path. +class WidthAttrValues_ { + bits<2> NONE = 0; + bits<2> ALL = 1; + bits<2> WAVESIZE = 2; + bits<2> ONE = 3; +} + +def WidthAttrValues : WidthAttrValues_; + + +class AtomicNoRet { + string NoRetOp = noRetOp; + bit IsRet = isRet; +} + +// Maps an atomic opcode to its version with a return value. +def getAtomicRetOp : InstrMapping { + let FilterClass = "AtomicNoRet"; + let RowFields = ["NoRetOp"]; + let ColFields = ["IsRet"]; + let KeyCol = ["0"]; + let ValueCols = [["1"]]; +} + +// Maps an atomic opcode to its returnless version. +def getAtomicNoRetOp : InstrMapping { + let FilterClass = "AtomicNoRet"; + let RowFields = ["NoRetOp"]; + let ColFields = ["IsRet"]; + let KeyCol = ["1"]; + let ValueCols = [["0"]]; +} + +class LdStVectorMap { + string opcode = op; + int vec_size = size; +} + +def getLdStVectorOpcode : InstrMapping { + let FilterClass = "LdStVectorMap"; + let RowFields = ["opcode"]; + let ColFields = ["vec_size"]; + let KeyCol = ["1"]; + let ValueCols = [["1"], ["2"], ["3"], ["4"]]; +} + +class HSAILDestOperand : RegisterOperand ; + +// Normal source operand which can be an immediate or a register. +class HSAILSrcOperand : RegisterOperand { + let OperandNamespace = "HSAIL"; + let OperandType = "OPERAND_REG_IMM"; +} + +def HSAILDest1Operand : HSAILDestOperand; +def HSAILDest32Operand : HSAILDestOperand; +def HSAILDest64Operand : HSAILDestOperand; + +def HSAILSrc1Operand : HSAILSrcOperand; +def HSAILSrc32Operand : HSAILSrcOperand; +def HSAILSrc64Operand : HSAILSrcOperand; + + +class getRegOpForVT { + RegisterOperand ret = !if(!eq(VT.Size, 32), HSAILSrc32Operand, + !if(!eq(VT.Size, 64), HSAILSrc64Operand, + HSAILSrc1Operand)); // else VT == i1 +} + +class getDestRegOpForVT { + RegisterOperand ret = !if(!eq(VT.Size, 32), HSAILDest32Operand, + !if(!eq(VT.Size, 64), HSAILDest64Operand, + HSAILDest1Operand)); // else VT == i1 +} + +class getRegClassForVT { + RegisterClass ret = !if(!eq(VT.Size, 32), GPR32, + !if(!eq(VT.Size, 64), GPR64, + CR)); // else VT == i1 +} + +class getDestRegClassForVT { + RegisterClass ret = !if(!eq(VT.Size, 32), GPR32, + !if(!eq(VT.Size, 64), GPR64, + CR)); // else VT == i1 +} + +class BRIGType { + field int BT = bt; + field ValueType VT = vt; + field string Name = name; + field string InstName = instName; + field RegisterClass SrcRC = getRegClassForVT.ret; +} + +class HSAILOperand : RegisterOperand { + field BRIGType BT = ty; + field ValueType VT = ty.VT; + let PrintMethod = pm; + + let OperandNamespace = "HSAIL"; + let OperandType = "OPERAND_REG_IMM"; +} + +class HSAILProfile ArgBT> { + field ValueType DestVT = ArgBT[0].VT; + field ValueType Src0VT = ArgBT[1].VT; + field ValueType Src1VT = ArgBT[2].VT; + field ValueType Src2VT = ArgBT[3].VT; + field ValueType Src3VT = ArgBT[4].VT; + + field HSAILOperand DestRC = ArgBT[0]; + field HSAILOperand Src0RC = ArgBT[1]; + field HSAILOperand Src1RC = ArgBT[2]; + field HSAILOperand Src2RC = ArgBT[3]; + field HSAILOperand Src3RC = ArgBT[4]; +} + +//===----------------------------------------------------------------------===// +// Custom Operands +//===----------------------------------------------------------------------===// +include "HSAILOperands.td" +include "HSAILEnums.td" + +def UntypedTy : BRIGType; + +def B1Ty : BRIGType; +def B32Ty : BRIGType; +def B64Ty : BRIGType; + +def S32Ty : BRIGType; +def S64Ty : BRIGType; + +def U32Ty : BRIGType; +def U64Ty : BRIGType; + +// Deal with cases that still assume f16 is an i32. +def F16Ty_i32 : BRIGType; +def F16Ty_f32 : BRIGType; + +def F16Ty : BRIGType; +def F32Ty : BRIGType; +def F64Ty : BRIGType; + +def U8X4Ty : BRIGType; +def U8X8Ty : BRIGType; + +def S8X4Ty : BRIGType; +def S8X8Ty : BRIGType; + +def U16X2Ty : BRIGType; + +def U16X4Ty : BRIGType; +def S16X4Ty : BRIGType; + +def U32X2Ty : BRIGType; +def S32X2Ty : BRIGType; + +def F16X2Ty : BRIGType; +def F16X4Ty : BRIGType; + +def UntypedOp : HSAILOperand; + +def B1Op : HSAILOperand; +def B32Op : HSAILOperand; +def B64Op : HSAILOperand; + +def F16Op : HSAILOperand; + +def S32Op : HSAILOperand; +def U32Op : HSAILOperand; +def F32Op : HSAILOperand; + +def S64Op : HSAILOperand; +def U64Op : HSAILOperand; +def F64Op : HSAILOperand; + +def U8X4Op : HSAILOperand; +def U8X8Op : HSAILOperand; + +def S8X4Op : HSAILOperand; +def S8X8Op : HSAILOperand; + +def U16X2Op : HSAILOperand; + +def U16X4Op : HSAILOperand; +def S16X4Op : HSAILOperand; + +def U32X2Op : HSAILOperand; +def S32X2Op : HSAILOperand; + +def F16X2Op : HSAILOperand; +def F16X4Op : HSAILOperand; + +def Vec2SrcOpU32 : Operand { + let MIOperandInfo = (ops U32Op, U32Op); + let PrintMethod = "printV2U32"; +} + +def Vec2SrcOpF32 : Operand { + let MIOperandInfo = (ops F32Op, F32Op); + let PrintMethod = "printV2F32"; +} + +def Vec2SrcOpU64 : Operand { + let MIOperandInfo = (ops U64Op, U64Op); + let PrintMethod = "printV2U64"; +} + +def Vec2SrcOpF64 : Operand { + let MIOperandInfo = (ops F64Op, F64Op); + let PrintMethod = "printV2F64"; +} + + +def Vec3SrcOpU32 : Operand { + let MIOperandInfo = (ops U32Op, U32Op, U32Op); + let PrintMethod = "printV3U32"; +} + +def Vec3SrcOpF32 : Operand { + let MIOperandInfo = (ops F32Op, F32Op, F32Op); + let PrintMethod = "printV3F32"; +} + +def Vec3SrcOpU64 : Operand { + let MIOperandInfo = (ops U64Op, U64Op, U64Op); + let PrintMethod = "printV3U64"; +} + +def Vec3SrcOpF64 : Operand { + let MIOperandInfo = (ops F64Op, F64Op, F64Op); + let PrintMethod = "printV3F64"; +} + + +def Vec4SrcOpU32 : Operand { + let MIOperandInfo = (ops U32Op, U32Op, U32Op, U32Op); + let PrintMethod = "printV4U32"; +} + +def Vec4SrcOpF32 : Operand { + let MIOperandInfo = (ops F32Op, F32Op, F32Op, F32Op); + let PrintMethod = "printV4F32"; +} + +def Vec4SrcOpU64 : Operand { + let MIOperandInfo = (ops U64Op, U64Op, U64Op, U64Op); + let PrintMethod = "printV4U64"; +} + +def Vec4SrcOpF64 : Operand { + let MIOperandInfo = (ops F64Op, F64Op, F64Op, F64Op); + let PrintMethod = "printV4F64"; +} + + +def Inst_Void : HSAILProfile<[UntypedOp, UntypedOp, UntypedOp, UntypedOp, UntypedOp]>; + +def Inst_S32_S32 : HSAILProfile<[S32Op, S32Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_S32_S32_S32 : HSAILProfile<[S32Op, S32Op, S32Op, UntypedOp, UntypedOp]>; +def Inst_S32_S32_S32_S32 : HSAILProfile<[S32Op, S32Op, S32Op, S32Op, UntypedOp]>; + +def Inst_S64_S64 : HSAILProfile<[S64Op, S64Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_S64_S64_S64 : HSAILProfile<[S64Op, S64Op, S64Op, UntypedOp, UntypedOp]>; +def Inst_S64_S64_S64_S64 : HSAILProfile<[S64Op, S64Op, S64Op, S64Op, UntypedOp]>; + +def Inst_U32 : HSAILProfile<[U32Op, UntypedOp, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_U32_U32 : HSAILProfile<[U32Op, U32Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_U32_U32_U32 : HSAILProfile<[U32Op, U32Op, U32Op, UntypedOp, UntypedOp]>; +def Inst_U32_U32_U32_U32 : HSAILProfile<[U32Op, U32Op, U32Op, U32Op, UntypedOp]>; + +def Inst_U64 : HSAILProfile<[U64Op, UntypedOp, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_U64_U64 : HSAILProfile<[U64Op, U64Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_U64_U64_U64 : HSAILProfile<[U64Op, U64Op, U64Op, UntypedOp, UntypedOp]>; +def Inst_U64_U64_U64_U64 : HSAILProfile<[U64Op, U64Op, U64Op, U64Op, UntypedOp]>; + +def Inst_B1_B1 : HSAILProfile<[B1Op, B1Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_B1_B1_B1 : HSAILProfile<[B1Op, B1Op, B1Op, UntypedOp, UntypedOp]>; +def Inst_B1_B1_B1_B1 : HSAILProfile<[B1Op, B1Op, B1Op, B1Op, UntypedOp]>; + +def Inst_B32_B32 : HSAILProfile<[B32Op, B32Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_B32_B32_B32 : HSAILProfile<[B32Op, B32Op, B32Op, UntypedOp, UntypedOp]>; +def Inst_B32_B32_B32_B32 : HSAILProfile<[B32Op, B32Op, B32Op, B32Op, UntypedOp]>; + +def Inst_B64_B64 : HSAILProfile<[B64Op, B64Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_B64_B64_B64 : HSAILProfile<[B64Op, B64Op, B64Op, UntypedOp, UntypedOp]>; +def Inst_B64_B64_B64_B64 : HSAILProfile<[B64Op, B64Op, B64Op, B64Op, UntypedOp]>; + +def Inst_F32_F32 : HSAILProfile<[F32Op, F32Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_F32_F32_F32 : HSAILProfile<[F32Op, F32Op, F32Op, UntypedOp, UntypedOp]>; +def Inst_F32_F32_F32_F32 : HSAILProfile<[F32Op, F32Op, F32Op, F32Op, UntypedOp]>; + +def Inst_F64_F64 : HSAILProfile<[F64Op, F64Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_F64_F64_F64 : HSAILProfile<[F64Op, F64Op, F64Op, UntypedOp, UntypedOp]>; +def Inst_F64_F64_F64_F64 : HSAILProfile<[F64Op, F64Op, F64Op, F64Op, UntypedOp]>; + +def Inst_B32_B1_B32_B32 : HSAILProfile<[B32Op, B1Op, B32Op, B32Op, UntypedOp, UntypedOp]>; +def Inst_B64_B1_B64_B64 : HSAILProfile<[B64Op, B1Op, B64Op, B64Op, UntypedOp, UntypedOp]>; + +def Inst_F32_B1_F32_F32 : HSAILProfile<[F32Op, B1Op, F32Op, F32Op, UntypedOp, UntypedOp]>; +def Inst_F64_B1_F64_F64 : HSAILProfile<[F64Op, B1Op, F64Op, F64Op, UntypedOp, UntypedOp]>; + +def Inst_U8X4_U8X4_U8X4_U8X4 : HSAILProfile<[U8X4Op, U8X4Op, U8X4Op, U8X4Op, UntypedOp]>; +def Inst_B64_B64_B32_B64 : HSAILProfile<[B64Op, B64Op, B32Op, B64Op, UntypedOp]>; + +def Inst_S64_S64_U32_U32 : HSAILProfile<[S64Op, S64Op, U32Op, U32Op, UntypedOp]>; +def Inst_U64_U64_U32_U32 : HSAILProfile<[U64Op, U64Op, U32Op, U32Op, UntypedOp]>; +def Inst_S32_S32_U32_U32 : HSAILProfile<[S32Op, S32Op, U32Op, U32Op, UntypedOp]>; + +def Inst_S64_S64_U32 : HSAILProfile<[S64Op, S64Op, U32Op, UntypedOp, UntypedOp]>; +def Inst_U64_U64_U32 : HSAILProfile<[U64Op, U64Op, U32Op, UntypedOp, UntypedOp]>; + +def Inst_F32_F32_U32 : HSAILProfile<[F32Op, F32Op, U32Op, UntypedOp, UntypedOp]>; +def Inst_F64_F64_U32 : HSAILProfile<[F64Op, F64Op, U32Op, UntypedOp, UntypedOp]>; + +def Inst_U32_B64 : HSAILProfile<[U32Op, B64Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_U32_B32 : HSAILProfile<[U32Op, B32Op, UntypedOp, UntypedOp, UntypedOp]>; + +def Inst_U32_S64 : HSAILProfile<[U32Op, S64Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_U32_U64 : HSAILProfile<[U32Op, U64Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_U32_S32 : HSAILProfile<[U32Op, S32Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_U64_U32 : HSAILProfile<[U64Op, U32Op, UntypedOp, UntypedOp, UntypedOp]>; + +def Inst_U32_B1 : HSAILProfile<[U32Op, B1Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_B1_U32 : HSAILProfile<[B1Op, U32Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_B1_U64 : HSAILProfile<[B1Op, U64Op, UntypedOp, UntypedOp, UntypedOp]>; + + +def Inst_U8X4_U8X4_U32_U32 : HSAILProfile<[U8X4Op, U8X4Op, U32Op, U32Op, UntypedOp]>; +def Inst_S8X4_S8X4_S32_U32 : HSAILProfile<[S8X4Op, S8X4Op, S32Op, U32Op, UntypedOp]>; + +def Inst_U8X8_U8X8_U32_U32 : HSAILProfile<[U8X8Op, U8X8Op, U32Op, U32Op, UntypedOp]>; +def Inst_S8X8_S8X8_S32_U32 : HSAILProfile<[S8X8Op, S8X8Op, S32Op, U32Op, UntypedOp]>; + +def Inst_U16X4_U16X4_U32_U32 : HSAILProfile<[U16X4Op, U16X4Op, U32Op, U32Op, UntypedOp]>; +def Inst_S16X4_S16X4_S32_U32 : HSAILProfile<[S16X4Op, S16X4Op, S32Op, U32Op, UntypedOp]>; + +def Inst_U32X2_U32X2_U32_U32 : HSAILProfile<[U32X2Op, U32X2Op, U32Op, U32Op, UntypedOp]>; +def Inst_S32X2_S32X2_S32_U32 : HSAILProfile<[S32X2Op, S32X2Op, S32Op, U32Op, UntypedOp]>; + +def Inst_F16X2_F16X2_F16_U32 : HSAILProfile<[F16X2Op, F16X2Op, F16Op, U32Op, UntypedOp]>; +def Inst_F16X4_F16X4_F16_U32 : HSAILProfile<[F16X4Op, F16X4Op, F16Op, U32Op, UntypedOp]>; + + +def Inst_U8X4_F32_F32_F32_F32 : HSAILProfile<[U8X4Op, F32Op, F32Op, F32Op, F32Op]>; +def Inst_F32_U8X4_U32 : HSAILProfile<[F32Op, U8X4Op, U32Op, UntypedOp, UntypedOp]>; + +def Inst_U32_U16X2_U16X2_U32 : HSAILProfile<[U32Op, U16X2Op, U16X2Op, U32Op, UntypedOp]>; +def Inst_U32_U8X4_U8X4_U32 : HSAILProfile<[U32Op, U8X4Op, U8X4Op, U32Op, UntypedOp]>; +def Inst_U16X2_U8X4_U8X4_U16X2 : HSAILProfile<[U16X2Op, U8X4Op, U8X4Op, U16X2Op, UntypedOp]>; + +def Inst_B1_F32_U32 : HSAILProfile<[B1Op, F32Op, U32Op, UntypedOp, UntypedOp]>; +def Inst_B1_F64_U32 : HSAILProfile<[B1Op, F64Op, U32Op, UntypedOp, UntypedOp]>; + +def Inst_B1_B1_U32_B1_B1 : HSAILProfile<[B1Op, B1Op, U32Op, B1Op, B1Op]>; +def Inst_B32_B32_U32_B32_B1 : HSAILProfile<[B32Op, B32Op, U32Op, B32Op, B1Op]>; +def Inst_B64_B64_U32_B64_B1 : HSAILProfile<[B64Op, B64Op, U32Op, B64Op, B1Op]>; + + +// Compare profiles. +def Inst_B1_S32_S32 : HSAILProfile<[B1Op, S32Op, S32Op, UntypedOp, UntypedOp]>; +def Inst_B1_S64_S64 : HSAILProfile<[B1Op, S64Op, S64Op, UntypedOp, UntypedOp]>; + +def Inst_B1_U32_U32 : HSAILProfile<[B1Op, U32Op, U32Op, UntypedOp, UntypedOp]>; +def Inst_B1_U64_U64 : HSAILProfile<[B1Op, U64Op, U64Op, UntypedOp, UntypedOp]>; + +def Inst_B1_F32_F32 : HSAILProfile<[B1Op, F32Op, F32Op, UntypedOp, UntypedOp]>; +def Inst_B1_F64_F64 : HSAILProfile<[B1Op, F64Op, F64Op, UntypedOp, UntypedOp]>; + +// Atomic profiles. +def Inst_Void_B32 : HSAILProfile<[UntypedOp, B32Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_Void_B64 : HSAILProfile<[UntypedOp, B64Op, UntypedOp, UntypedOp, UntypedOp]>; + +def Inst_Void_S32 : HSAILProfile<[UntypedOp, S32Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_Void_S64 : HSAILProfile<[UntypedOp, S64Op, UntypedOp, UntypedOp, UntypedOp]>; + +def Inst_Void_U32 : HSAILProfile<[UntypedOp, U32Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_Void_U64 : HSAILProfile<[UntypedOp, U64Op, UntypedOp, UntypedOp, UntypedOp]>; + +def Inst_B32 : HSAILProfile<[B32Op, UntypedOp, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_B64 : HSAILProfile<[B64Op, UntypedOp, UntypedOp, UntypedOp, UntypedOp]>; + + +//===----------------------------------------------------------------------===// +// Custom Selection DAG Type Profiles +//===----------------------------------------------------------------------===// +include "HSAILProfiles.td" + +//===----------------------------------------------------------------------===// +// Custom Selection DAG Nodes +//===----------------------------------------------------------------------===// +include "HSAILNodes.td" + +//===----------------------------------------------------------------------===// +// Custom Pattern DAG Nodes +//===----------------------------------------------------------------------===// +include "HSAILPatterns.td" + +//===----------------------------------------------------------------------===// +// Instruction format classes +//===----------------------------------------------------------------------===// +include "HSAILInstrFormats.td" + +//===----------------------------------------------------------------------===// +// Intrinsics support +//===----------------------------------------------------------------------===// +include "HSAILIntrinsics.td" + +//===----------------------------------------------------------------------===// +// Instructions support +//===----------------------------------------------------------------------===// +include "HSAILInstructions.td" Index: lib/Target/HSAIL/HSAILInstructions.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILInstructions.td @@ -0,0 +1,56 @@ +//===------------------------------------------------------*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//////////////////////////////////////////////////////////////////////////////// +// All basic int and fp arithmetic instructions, shifts, bit manipulation, +// moves, cmoves and multimedia like unpack. +// If we have full support of multimedia we would need to extract latter into a +// separate file. +include "HSAILArithmetic.td" + +//////////////////////////////////////////////////////////////////////////////// +// All comparisons and testing, including class_f32|64 +include "HSAILComparisons.td" + +//////////////////////////////////////////////////////////////////////////////// +// All atomic operations +include "HSAILAtomics.td" + +//////////////////////////////////////////////////////////////////////////////// +// Special HSAIL operations like NDRange queries, barriers, syncs etc +include "HSAILSpecial.td" + +//////////////////////////////////////////////////////////////////////////////// +// All control transfer instructions including call, ret and branches +include "HSAILControlFlow.td" + +//////////////////////////////////////////////////////////////////////////////// +// All conversions including bitcasts resulting in plain moves +include "HSAILConversions.td" + +//////////////////////////////////////////////////////////////////////////////// +// All loads and stores including kernarg and arg operations, argscopes and +// params. Image and sampler parameter manipulation operations are also here. +// stof and ftos operations are here as well. +include "HSAILLoadStore.td" + +//////////////////////////////////////////////////////////////////////////////// +// All image operations except image parameters handling +include "HSAILImages.td" + +//////////////////////////////////////////////////////////////////////////////// +// Fused opcodes folding a complex dag into a single instruction or a short +// instruction sequence, like fma or rsqrt. +// This file is for optimization purposes. Its inclusion is not required for +// valid code generation. For example, rsqrt is defined twice - first time in +// the HSAILArithmetic.td to be used with intrinsic expansion and second time +// here, but with a pattern folding div and sqrt into a single instruction. +// For that reason HSAILFusion.td is always last, so no other pattern would +// accidentally refer any of its opcodes. +include "HSAILFusion.td" Index: lib/Target/HSAIL/HSAILIntrinsicInfo.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILIntrinsicInfo.h @@ -0,0 +1,63 @@ +//===------------ HSAILIntrinsicInfo.h - HSAILIntrinsic Info ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the target intrinsic instructions to the code generator. +// +//===----------------------------------------------------------------------===// + +#ifndef _HSAIL_INTRINSIC_INFO_H_ +#define _HSAIL_INTRINSIC_INFO_H_ + +#include "llvm/IR/Intrinsics.h" +#include "llvm/Target/TargetIntrinsicInfo.h" + +namespace llvm { + +class Function; +class Module; +class Type; + +class HSAILTargetMachine; + +namespace HSAILIntrinsic { +enum ID { + last_non_HSAIL_intrinsic = Intrinsic::num_intrinsics - 1, +#define GET_INTRINSIC_ENUM_VALUES +#include "HSAILGenIntrinsics.inc" +#undef GET_INTRINSIC_ENUM_VALUES + , + num_HSAIL_intrinsics +}; +} + +//--------------------------------------------------------------------------- +/// +/// HSAILIntrinsicInfo - Interface to description of machine intrinsic set +/// +class HSAILIntrinsicInfo : public TargetIntrinsicInfo { +public: + HSAILIntrinsicInfo(HSAILTargetMachine *tm); + + std::string getName(unsigned IID, Type **Tys = nullptr, + unsigned numTys = 0) const override; + + unsigned lookupName(const char *Name, unsigned Len) const override; + + bool isOverloaded(unsigned IID) const override; + + Function *getDeclaration(Module *M, unsigned ID, Type **Tys = nullptr, + unsigned numTys = 0) const override; + + static bool isReadImage(llvm::HSAILIntrinsic::ID intr); + static bool isLoadImage(llvm::HSAILIntrinsic::ID intr); +}; + +} // End llvm namespace + +#endif Index: lib/Target/HSAIL/HSAILIntrinsicInfo.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILIntrinsicInfo.cpp @@ -0,0 +1,143 @@ +//===-- HSAILIntrinsicInfo.cpp - HSAIL Intrinsic Information --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file contains the HSAIL Implementation of the IntrinsicInfo class. +// +//===----------------------------------------------------------------------===// + +#include "HSAIL.h" +#include "HSAILIntrinsicInfo.h" +#include "HSAILTargetMachine.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Intrinsics.h" +using namespace llvm; + +#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN +#include "HSAILGenIntrinsics.inc" +#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN + +bool HSAILIntrinsicInfo::isReadImage(HSAILIntrinsic::ID intr) { + switch (intr) { + default: + return false; + + case HSAILIntrinsic::HSAIL_rd_imgf_1d_f32: + case HSAILIntrinsic::HSAIL_rd_imgf_1d_s32: + case HSAILIntrinsic::HSAIL_rd_imgf_1da_f32: + case HSAILIntrinsic::HSAIL_rd_imgf_1da_s32: + case HSAILIntrinsic::HSAIL_rd_imgf_2d_f32: + case HSAILIntrinsic::HSAIL_rd_imgf_2d_s32: + case HSAILIntrinsic::HSAIL_rd_imgf_2da_f32: + case HSAILIntrinsic::HSAIL_rd_imgf_2da_s32: + case HSAILIntrinsic::HSAIL_rd_imgf_3d_f32: + case HSAILIntrinsic::HSAIL_rd_imgf_3d_s32: + case HSAILIntrinsic::HSAIL_rd_imgi_1d_f32: + case HSAILIntrinsic::HSAIL_rd_imgi_1d_s32: + case HSAILIntrinsic::HSAIL_rd_imgi_1da_f32: + case HSAILIntrinsic::HSAIL_rd_imgi_1da_s32: + case HSAILIntrinsic::HSAIL_rd_imgi_2d_f32: + case HSAILIntrinsic::HSAIL_rd_imgi_2d_s32: + case HSAILIntrinsic::HSAIL_rd_imgi_2da_f32: + case HSAILIntrinsic::HSAIL_rd_imgi_2da_s32: + case HSAILIntrinsic::HSAIL_rd_imgi_3d_f32: + case HSAILIntrinsic::HSAIL_rd_imgi_3d_s32: + case HSAILIntrinsic::HSAIL_rd_imgui_1d_f32: + case HSAILIntrinsic::HSAIL_rd_imgui_1d_s32: + case HSAILIntrinsic::HSAIL_rd_imgui_1da_f32: + case HSAILIntrinsic::HSAIL_rd_imgui_1da_s32: + case HSAILIntrinsic::HSAIL_rd_imgui_2d_f32: + case HSAILIntrinsic::HSAIL_rd_imgui_2d_s32: + case HSAILIntrinsic::HSAIL_rd_imgui_2da_f32: + case HSAILIntrinsic::HSAIL_rd_imgui_2da_s32: + case HSAILIntrinsic::HSAIL_rd_imgui_3d_f32: + case HSAILIntrinsic::HSAIL_rd_imgui_3d_s32: + case HSAILIntrinsic::HSAIL_rd_imgf_2ddepth_f32: + case HSAILIntrinsic::HSAIL_rd_imgf_2ddepth_s32: + case HSAILIntrinsic::HSAIL_rd_imgf_2dadepth_f32: + case HSAILIntrinsic::HSAIL_rd_imgf_2dadepth_s32: + return true; + } +} + +bool HSAILIntrinsicInfo::isLoadImage(HSAILIntrinsic::ID intr) { + switch (intr) { + default: + return false; + + case HSAILIntrinsic::HSAIL_ld_imgf_1d_u32: + case HSAILIntrinsic::HSAIL_ld_imgf_1da_u32: + case HSAILIntrinsic::HSAIL_ld_imgf_1db_u32: + case HSAILIntrinsic::HSAIL_ld_imgf_2d_u32: + case HSAILIntrinsic::HSAIL_ld_imgf_2da_u32: + case HSAILIntrinsic::HSAIL_ld_imgf_3d_u32: + case HSAILIntrinsic::HSAIL_ld_imgi_1d_u32: + case HSAILIntrinsic::HSAIL_ld_imgi_1da_u32: + case HSAILIntrinsic::HSAIL_ld_imgi_1db_u32: + case HSAILIntrinsic::HSAIL_ld_imgi_2d_u32: + case HSAILIntrinsic::HSAIL_ld_imgi_2da_u32: + case HSAILIntrinsic::HSAIL_ld_imgi_3d_u32: + case HSAILIntrinsic::HSAIL_ld_imgui_1d_u32: + case HSAILIntrinsic::HSAIL_ld_imgui_1da_u32: + case HSAILIntrinsic::HSAIL_ld_imgui_1db_u32: + case HSAILIntrinsic::HSAIL_ld_imgui_2d_u32: + case HSAILIntrinsic::HSAIL_ld_imgui_2da_u32: + case HSAILIntrinsic::HSAIL_ld_imgui_3d_u32: + case HSAILIntrinsic::HSAIL_ld_imgf_2ddepth_u32: + case HSAILIntrinsic::HSAIL_ld_imgf_2dadepth_u32: + return true; + } +} + +HSAILIntrinsicInfo::HSAILIntrinsicInfo(HSAILTargetMachine *tm) + : TargetIntrinsicInfo() {} + +std::string HSAILIntrinsicInfo::getName(unsigned int IntrID, Type **Tys, + unsigned int numTys) const { + static const char *const names[] = { +#define GET_INTRINSIC_NAME_TABLE +#include "HSAILGenIntrinsics.inc" +#undef GET_INTRINSIC_NAME_TABLE + }; + + if (IntrID < Intrinsic::num_intrinsics) { + return 0; + } + assert(IntrID < HSAILIntrinsic::num_HSAIL_intrinsics && + "Invalid intrinsic ID"); + + std::string Result(names[IntrID - Intrinsic::num_intrinsics]); + return Result; +} + +unsigned HSAILIntrinsicInfo::lookupName(const char *Name, + unsigned Len) const { +#define GET_FUNCTION_RECOGNIZER +#include "HSAILGenIntrinsics.inc" +#undef GET_FUNCTION_RECOGNIZER + return getIntrinsicForGCCBuiltin("HSAIL", Name); +} + +bool HSAILIntrinsicInfo::isOverloaded(unsigned IntrID) const { + if (!IntrID) + return false; + + unsigned id = IntrID - Intrinsic::num_intrinsics + 1; +#define GET_INTRINSIC_OVERLOAD_TABLE +#include "HSAILGenIntrinsics.inc" +#undef GET_INTRINSIC_OVERLOAD_TABLE +} + +Function *HSAILIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID, + Type **Tys, + unsigned int numTys) const { + llvm_unreachable("Not implemented"); + + return nullptr; +} Index: lib/Target/HSAIL/HSAILIntrinsics.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILIntrinsics.td @@ -0,0 +1,1325 @@ +//===------------------------------------------------------*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines all of the hsail-specific intrinsics. +// +//===----------------------------------------------------------------------===// + +let TargetPrefix = "HSAIL", isTarget = 1 in { + +// HSAIL intrinsics +def int_HSAIL_workitemid_flat : GCCBuiltin<"__hsail_workitemid_flat">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem] +>; + +def int_HSAIL_workitemid_flatabs : GCCBuiltin<"__hsail_workitemid_flatabs">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem] +>; + +def int_HSAIL_get_lane_id : GCCBuiltin<"__hsail_get_lane_id">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem] +>; + +def int_HSAIL_get_dynwave_id : GCCBuiltin<"__hsail_get_dynwave_id">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem] +>; + +def int_HSAIL_get_maxdynwave_id : GCCBuiltin<"__hsail_get_maxdynwave_id">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem] +>; + +def int_HSAIL_get_clock : GCCBuiltin<"__hsail_get_clock">, + Intrinsic<[llvm_i64_ty], [], [] +>; + +def int_HSAIL_get_cu : GCCBuiltin<"__hsail_get_cu">, + Intrinsic<[llvm_i32_ty], [], [] +>; + +// HSAIL Cross-Lane Intrinsics +// For OCL 2.0 1/32/64-bit built-ins +def int_HSAIL_activelaneid_u32 : GCCBuiltin<"__hsail_activelaneid_u32">, + Intrinsic<[llvm_i32_ty], [], [] +>; + +def int_HSAIL_activelaneid_width_u32 : + GCCBuiltin<"__hsail_activelaneid_wavewidth_u32">, + Intrinsic<[llvm_i32_ty], [], [] +>; + +def int_HSAIL_activelanecount_u32_b1 : + GCCBuiltin<"__hsail_activelanecount_u32_b1">, + Intrinsic<[llvm_i32_ty], [llvm_i1_ty], [] +>; + +def int_HSAIL_activelanecount_width_u32_b1 : + GCCBuiltin<"__hsail_activelanecount_wavewidth_u32_b1">, + Intrinsic<[llvm_i32_ty], [llvm_i1_ty], [] +>; + +def int_HSAIL_activelanepermute_b32 : + GCCBuiltin<"__hsail_activelanepermute_b32">, + Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty], [] +>; + +def int_HSAIL_activelanepermute_width_b32 : + GCCBuiltin<"__hsail_activelanepermute_wavewidth_b32">, + Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty], [] +>; + +def int_HSAIL_activelanemask_v4_b64_b1 : + GCCBuiltin<"__hsail_activelanemask_v4_b64_b1">, + Intrinsic<[llvm_i64_ty, llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], + [llvm_i1_ty], [] +>; + +def int_HSAIL_activelanemask_v4_width_b64_b1 : + GCCBuiltin<"__hsail_activelanemask_v4_wavewidth_b64_b1">, + Intrinsic<[llvm_i64_ty, llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], + [llvm_i1_ty], [] +>; + + +// For OCL 2.0 64-bit built-ins + +def int_HSAIL_activelanepermute_b64 : + GCCBuiltin< "__hsail_activelanepermute_b64">, + Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty, llvm_i1_ty], [] +>; + +def int_HSAIL_activelanepermute_width_b64 : + GCCBuiltin<"__hsail_activelanepermute_wavewidth_b64">, + Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty, llvm_i1_ty], [] +>; + +// Intrinsics for OpenCL workitem built-ins (OCL 1.2 6.12.2) +def int_HSAIL_get_work_dim : GCCBuiltin<"__hsail_get_work_dim">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem] +>; + +def int_HSAIL_get_global_id : GCCBuiltin<"__hsail_get_global_id">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_get_group_id : GCCBuiltin<"__hsail_get_group_id">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_get_local_id : GCCBuiltin<"__hsail_get_local_id">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_workgroup_size : GCCBuiltin<"__hsail_workgroup_size">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_currentworkgroup_size : + GCCBuiltin<"__hsail_currentworkgroup_size">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_get_global_size : GCCBuiltin<"__hsail_get_global_size">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_get_num_groups : GCCBuiltin<"__hsail_get_num_groups">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem] +>; + +// Intrinsics for OpenCL math built-ins (OCL 1.2 6.12.2) +def int_HSAIL_copysign_f32 : GCCBuiltin<"__hsail_copysign_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_copysign_f64 : GCCBuiltin<"__hsail_copysign_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_ncos_f32 : GCCBuiltin<"__hsail_ncos_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_nexp2_f32 : GCCBuiltin<"__hsail_nexp2_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_abs_f32 : GCCBuiltin<"__hsail_abs_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_abs_f64 : GCCBuiltin<"__hsail_abs_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_fma_f32 : GCCBuiltin<"__hsail_fma_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem] +>; +def int_HSAIL_fma_f64 : GCCBuiltin<"__hsail_fma_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty, llvm_double_ty], [IntrNoMem]>; + +def int_HSAIL_nfma_f32 : GCCBuiltin<"__hsail_nfma_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_nfma_f64 : GCCBuiltin<"__hsail_nfma_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty, llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_nlog2_f32 : GCCBuiltin<"__hsail_nlog2_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_nsin_f32 : GCCBuiltin<"__hsail_nsin_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_nsqrt_f32 : GCCBuiltin<"__hsail_nsqrt_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_nsqrt_f64 : GCCBuiltin<"__hsail_nsqrt_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_sqrt_ftz_f32 : GCCBuiltin<"__hsail_sqrt_ftz_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_sqrt_f64 : GCCBuiltin<"__hsail_sqrt_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_nrsqrt_f32 : GCCBuiltin<"__hsail_nrsqrt_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_nrsqrt_f64 : GCCBuiltin<"__hsail_nrsqrt_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_nrcp_f32 : GCCBuiltin<"__hsail_nrcp_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_nrcp_f64 : GCCBuiltin<"__hsail_nrcp_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_ceil_f32 : GCCBuiltin<"__hsail_ceil_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_ceil_f64 : GCCBuiltin<"__hsail_ceil_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_floor_f32 : GCCBuiltin<"__hsail_floor_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_floor_f64 : GCCBuiltin<"__hsail_floor_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_rnd_f32 : GCCBuiltin<"__hsail_round_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_rnd_f64 : GCCBuiltin<"__hsail_round_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_trunc_f32 : GCCBuiltin<"__hsail_trunc_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_trunc_f64 : GCCBuiltin<"__hsail_trunc_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem] +>; + +// Intrinsics for OpenCL integer built-ins (OCL 1.2 6.12.3) +def int_HSAIL_max_u32 : GCCBuiltin<"__hsail_max_u32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_max_s32 : GCCBuiltin<"__hsail_max_s32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_max_u64 : GCCBuiltin<"__hsail_max_u64">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_max_s64 : GCCBuiltin<"__hsail_max_s64">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_max_f32 : GCCBuiltin<"__hsail_max_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_max_f64 : GCCBuiltin<"__hsail_max_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_gcn_max_f32 : GCCBuiltin<"__gcn_max_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_gcn_max_f64 : GCCBuiltin<"__gcn_max_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem] +>; + +// FIXME: min / max intrinsics should be removed. +def int_HSAIL_min_u32 : GCCBuiltin<"__hsail_min_u32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_min_s32 : GCCBuiltin<"__hsail_min_s32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_min_u64 : GCCBuiltin<"__hsail_min_u64">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_min_s64 : GCCBuiltin<"__hsail_min_s64">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_min_f32 : GCCBuiltin<"__hsail_min_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_min_f64 : GCCBuiltin<"__hsail_min_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_gcn_min_f32 : GCCBuiltin<"__gcn_min_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_gcn_min_f64 : GCCBuiltin<"__gcn_min_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_div_f32 : GCCBuiltin<"__hsail_div_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem] +>; + +// Intrinsics for OpenCL relational built-ins (OCL 1.2 6.12.6) +def int_HSAIL_class_f32 : GCCBuiltin<"__hsail_class_f32">, + Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_class_f64 : GCCBuiltin<"__hsail_class_f64">, + Intrinsic<[llvm_i32_ty], [llvm_double_ty, llvm_i32_ty], [IntrNoMem] +>; + +// Intrinsics for OpenCL synchronization built-ins (OCL 1.2 6.12.8) +def int_HSAIL_barrier : GCCBuiltin<"__hsail_barrier">, + Intrinsic<[], [], [] +>; + +def int_HSAIL_wavebarrier : GCCBuiltin<"__hsail_wavebarrier">, + Intrinsic<[], [], [] +>; + +def int_HSAIL_memfence : GCCBuiltin<"__hsail_memfence">, + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [] +>; + +def int_HSAIL_imagefence : GCCBuiltin<"__hsail_imagefence">, + Intrinsic<[], [], [] +>; + +// Instrinsics for explicit conversions +// float to int +def int_HSAIL_cvt_s32_neari_f32 : GCCBuiltin<"__cvt_s32_rte_f32">, + Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_s32_downi_f32 : GCCBuiltin<"__cvt_s32_rtn_f32">, + Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_s32_upi_f32 : GCCBuiltin<"__cvt_s32_rtp_f32">, + Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_s32_zeroi_f32 : GCCBuiltin<"__cvt_s32_rtz_f32">, + Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem] +>; + + +// float to unsigned int +def int_HSAIL_cvt_u32_neari_f32 : GCCBuiltin<"__cvt_u32_rte_f32">, + Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_u32_downi_f32 : GCCBuiltin<"__cvt_u32_rtn_f32">, + Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_u32_upi_f32 : GCCBuiltin<"__cvt_u32_rtp_f32">, + Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_u32_zeroi_f32 : GCCBuiltin<"__cvt_u32_rtz_f32">, + Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem] +>; + + +// float to long +def int_HSAIL_cvt_s64_neari_f32 : GCCBuiltin<"__cvt_s64_rte_f32">, + Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_s64_downi_f32 : GCCBuiltin<"__cvt_s64_rtn_f32">, + Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_s64_upi_f32 : GCCBuiltin<"__cvt_s64_rtp_f32">, + Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_s64_zeroi_f32 : GCCBuiltin<"__cvt_s64_rtz_f32">, + Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem] +>; + + +// float to unsigned long +def int_HSAIL_cvt_u64_neari_f32 : GCCBuiltin<"__cvt_u64_rte_f32">, + Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_u64_downi_f32 : GCCBuiltin<"__cvt_u64_rtn_f32">, + Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_u64_upi_f32 : GCCBuiltin<"__cvt_u64_rtp_f32">, + Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_u64_zeroi_f32 : GCCBuiltin<"__cvt_u64_rtz_f32">, + Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem] +>; + + +// double to int +def int_HSAIL_cvt_s32_neari_f64 : GCCBuiltin<"__cvt_s32_rte_f64">, + Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_s32_downi_f64 : GCCBuiltin<"__cvt_s32_rtn_f64">, + Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_s32_upi_f64 : GCCBuiltin<"__cvt_s32_rtp_f64">, + Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_s32_zeroi_f64 : GCCBuiltin<"__cvt_s32_rtz_f64">, + Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem] +>; + + +// double to unsigned int +def int_HSAIL_cvt_u32_neari_f64 : GCCBuiltin<"__cvt_u32_rte_f64">, + Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_u32_downi_f64 : GCCBuiltin<"__cvt_u32_rtn_f64">, + Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_u32_upi_f64 : GCCBuiltin<"__cvt_u32_rtp_f64">, + Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_u32_zeroi_f64 : GCCBuiltin<"__cvt_u32_rtz_f64">, + Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem] +>; + + +// double to long +def int_HSAIL_cvt_s64_neari_f64 : GCCBuiltin<"__cvt_s64_rte_f64">, + Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_s64_downi_f64 : GCCBuiltin<"__cvt_s64_rtn_f64">, + Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_s64_upi_f64 : GCCBuiltin<"__cvt_s64_rtp_f64">, + Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_s64_zeroi_f64 : GCCBuiltin<"__cvt_s64_rtz_f64">, + Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem] +>; + + +// double to unsigned long +def int_HSAIL_cvt_u64_neari_f64 : GCCBuiltin<"__cvt_u64_rte_f64">, + Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_u64_downi_f64 : GCCBuiltin<"__cvt_u64_rtn_f64">, + Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_u64_upi_f64 : GCCBuiltin<"__cvt_u64_rtp_f64">, + Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_u64_zeroi_f64 : GCCBuiltin<"__cvt_u64_rtz_f64">, + Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem] +>; + + +// int to float +def int_HSAIL_cvt_f32_down_i32 : GCCBuiltin<"__cvt_f32_rtn_i32">, + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_f32_up_i32 : GCCBuiltin<"__cvt_f32_rtp_i32">, + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_f32_zero_i32 : GCCBuiltin<"__cvt_f32_rtz_i32">, + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem] +>; + + +// unsigned int to float +def int_HSAIL_cvt_f32_down_u32 : GCCBuiltin<"__cvt_f32_rtn_u32">, + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_f32_up_u32 : GCCBuiltin<"__cvt_f32_rtp_u32">, + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_f32_zero_u32 : GCCBuiltin<"__cvt_f32_rtz_u32">, + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem] +>; + + +// long to float +def int_HSAIL_cvt_f32_down_i64 : GCCBuiltin<"__cvt_f32_rtn_i64">, + Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_f32_up_i64 : GCCBuiltin<"__cvt_f32_rtp_i64">, + Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_f32_zero_i64 : GCCBuiltin<"__cvt_f32_rtz_i64">, + Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem] +>; + + +// unsigned long to float +def int_HSAIL_cvt_f32_down_u64 : GCCBuiltin<"__cvt_f32_rtn_u64">, + Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_f32_up_u64 : GCCBuiltin<"__cvt_f32_rtp_u64">, + Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_f32_zero_u64 : GCCBuiltin<"__cvt_f32_rtz_u64">, + Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem] +>; + + +// long to double +def int_HSAIL_cvt_f64_down_i64 : GCCBuiltin<"__cvt_f64_rtn_i64">, + Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_f64_up_i64 : GCCBuiltin<"__cvt_f64_rtp_i64">, + Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_f64_zero_i64 : GCCBuiltin<"__cvt_f64_rtz_i64">, + Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem] +>; + + +// unsigned long to double +def int_HSAIL_cvt_f64_down_u64 : GCCBuiltin<"__cvt_f64_rtn_u64">, + Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_f64_up_u64 : GCCBuiltin<"__cvt_f64_rtp_u64">, + Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_f64_zero_u64 : GCCBuiltin<"__cvt_f64_rtz_u64">, + Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem] +>; + + +// double to float +def int_HSAIL_cvt_f32_down_f64 : GCCBuiltin<"__cvt_f32_rtn_f64">, + Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_f32_up_f64 : GCCBuiltin<"__cvt_f32_rtp_f64">, + Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_f32_zero_f64 : GCCBuiltin<"__cvt_f32_rtz_f64">, + Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem] +>; + + +// half to float +def int_HSAIL_cvt_f32_f16 : GCCBuiltin<"__cvt_f32_f16">, + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem] +>; + + +// float to half +def int_HSAIL_cvt_zero_f16_f32 : GCCBuiltin<"__cvt_f16_rtz_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_near_f16_f32 : GCCBuiltin<"__cvt_f16_rte_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_down_f16_f32 : GCCBuiltin<"__cvt_f16_rtn_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_up_f16_f32 : GCCBuiltin<"__cvt_f16_rtp_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +// double to half +def int_HSAIL_cvt_zero_f16_f64 : GCCBuiltin<"__cvt_f16_rtz_f64">, + Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_near_f16_f64 : GCCBuiltin<"__cvt_f16_rte_f64">, + Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_down_f16_f64 : GCCBuiltin<"__cvt_f16_rtn_f64">, + Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_up_f16_f64 : GCCBuiltin<"__cvt_f16_rtp_f64">, + Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem] +>; + +// Misc intrinsics used by OpenCL built-ins +def int_HSAIL_bitselect_u32 : GCCBuiltin<"__hsail_bitselect_u32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_bitselect_u64 : GCCBuiltin<"__hsail_bitselect_u64">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], [IntrNoMem] +>; + +// Media-ops intrinsics +def int_HSAIL_bitalign_b32 : GCCBuiltin<"__hsail_bitalign_b32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_bytealign_b32 : GCCBuiltin<"__hsail_bytealign_b32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_packcvt_u8x4_f32 : GCCBuiltin<"__hsail_packcvt_u8x4_f32">, + Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_lerp_u8x4 : GCCBuiltin<"__hsail_lerp_u8x4">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_sad_u32_u8x4 : GCCBuiltin<"__hsail_sad_u32_u8x4">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_sadhi_u16x2_u8x4 : GCCBuiltin<"__hsail_sadhi_u16x2_u8x4">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_unpackcvt_f32_u8x4 : GCCBuiltin<"__hsail_unpackcvt_f32_u8x4">, + Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +// Media Ops2 + +def int_HSAIL_msad: GCCBuiltin<"__hsail_msad">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_sadw: GCCBuiltin<"__hsail_sadw">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_sadd: GCCBuiltin<"__hsail_sadd">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_umin3: GCCBuiltin<"__hsail_umin3">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_fmin3: GCCBuiltin<"__hsail_f32_min3">, + Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_imin3: GCCBuiltin<"__hsail_imin3">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_umax3: GCCBuiltin<"__hsail_umax3">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_fmax3: GCCBuiltin<"__hsail_f32_max3">, + Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_imax3: GCCBuiltin<"__hsail_imax3">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_umed3: GCCBuiltin<"__hsail_umedian3">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_fmed3: GCCBuiltin<"__hsail_f32_median3">, + Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_imed3: GCCBuiltin<"__hsail_imedian3">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_qsad: GCCBuiltin<"__hsail_qsad">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_mqsad: GCCBuiltin<"__hsail_mqsad">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_bfe: GCCBuiltin<"__hsail_bfe">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_ibfe: GCCBuiltin<"__hsail_ibfe">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_bfm: GCCBuiltin<"__hsail_bfm">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_ftz_f32 : GCCBuiltin<"__hsail_ftz_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_mul_ftz_f32 : GCCBuiltin<"__hsail_mul_ftz_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_firstbit_u32 : GCCBuiltin<"__hsail_firstbit_u32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_lastbit_u32 : GCCBuiltin<"__hsail_lastbit_u32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_fract_f32 : GCCBuiltin<"__hsail_fraction_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_fract_f64 : GCCBuiltin<"__hsail_fraction_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_mad_u32 : GCCBuiltin<"__hsail_mad_u32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_mad_u64 : GCCBuiltin<"__hsail_mad_u64">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_mulhi_s32 : GCCBuiltin<"__hsail_mulhi_s32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_mulhi_u32 : GCCBuiltin<"__hsail_mulhi_u32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_mulhi_s64 : GCCBuiltin<"__hsail_mulhi_s64">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_mulhi_u64 : GCCBuiltin<"__hsail_mulhi_u64">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_mad24_s32 : GCCBuiltin<"__hsail_mad24_s32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_mad24_u32 : GCCBuiltin<"__hsail_mad24_u32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_mul24_s32 : GCCBuiltin<"__hsail_mul24_s32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_mul24_u32 : GCCBuiltin<"__hsail_mul24_u32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_popcount_u32_b32 : GCCBuiltin<"__hsail_popcount_u32_b32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_gcn_fldexp_f32 : GCCBuiltin<"__gcn_fldexp_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_gcn_fldexp_f64 : GCCBuiltin<"__gcn_fldexp_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_i32_ty], [IntrNoMem] +>; + +// atomic counter32 +// TODO: IntrReadWriteArgMem? +def int_HSAIL_gcn_atomic_append_u32 : GCCBuiltin<"__gcn_atomic_append_u32">, + Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [] +>; + +def int_HSAIL_gcn_atomic_consume_u32 : GCCBuiltin<"__gcn_atomic_consume_u32">, + Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [] +>; + +// Image intrinsics + +// Image read instrinsics +let TargetPrefix = "HSAIL", isTarget = 1 in { + // Read image intrinsic classes + class ReadImage1DIntr : + GCCBuiltin, + Intrinsic<[DestType, DestType, DestType, DestType], + [llvm_ptr_ty, llvm_ptr_ty, CoordType], + [IntrReadArgMem, NoCapture<4>, NoCapture<5>] + >; + + class ReadImage2DIntr : + GCCBuiltin, + Intrinsic<[DestType, DestType, DestType, DestType], + [llvm_ptr_ty, llvm_ptr_ty, CoordType, CoordType], + [IntrReadArgMem, NoCapture<4>, NoCapture<5>] + >; + + class ReadImage3DIntr : + GCCBuiltin, + Intrinsic<[DestType, DestType, DestType, DestType], + [llvm_ptr_ty, llvm_ptr_ty, CoordType, CoordType, CoordType], + [IntrReadArgMem, NoCapture<4>, NoCapture<5>] + >; + + // OpenCL 2.0 image 2D Depth + class ReadImage2DDepthIntr : + GCCBuiltin, + Intrinsic<[DestType], + [llvm_ptr_ty, llvm_ptr_ty, CoordType, CoordType], + [IntrReadArgMem, NoCapture<1>, NoCapture<2>] + >; + + // OpenCL 2.0 image 2D Array Depth + class ReadImage2DArrayDepthIntr : + GCCBuiltin, + Intrinsic<[DestType], + [llvm_ptr_ty, llvm_ptr_ty, CoordType, CoordType, CoordType], + [IntrReadArgMem, NoCapture<1>, NoCapture<2>] + >; + + // Load Image intrinsic classes + class LoadImage1DIntr : + GCCBuiltin, + Intrinsic<[DestType, DestType, DestType, DestType], + [llvm_ptr_ty, CoordType], + [IntrReadArgMem, NoCapture<4>] + >; + + class LoadImage2DIntr : + GCCBuiltin, + Intrinsic<[DestType, DestType, DestType, DestType], + [llvm_ptr_ty, CoordType, CoordType], + [IntrReadArgMem, NoCapture<4>] + >; + + class LoadImage3DIntr : + GCCBuiltin, + Intrinsic<[DestType, DestType, DestType, DestType], + [llvm_ptr_ty, CoordType, CoordType, CoordType], + [IntrReadArgMem, NoCapture<4>] + >; + + class LoadImage2DDepthIntr : + GCCBuiltin, + Intrinsic<[DestType], + [llvm_ptr_ty, CoordType, CoordType], + [IntrReadArgMem, NoCapture<1>] + >; + + class LoadImage2DArrayDepthIntr : + GCCBuiltin, + Intrinsic<[DestType], + [llvm_ptr_ty, CoordType, CoordType, CoordType], + [IntrReadArgMem, NoCapture<1>] + >; + + // Store image intrinsic classes + class StoreImage1dInt : + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_ptr_ty, llvm_i32_ty], [] + >; + + class StoreImage1dFloat : + Intrinsic<[], [llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_ptr_ty, llvm_i32_ty], [] + >; + + class StoreImage2dInt : + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [] + >; + + class StoreImage2dFloat : + Intrinsic<[], [llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_ptr_ty, llvm_i32_ty, + llvm_i32_ty], [] + >; + + class StoreImage3dInt : + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [] + >; + + class StoreImage3dFloat : + Intrinsic<[], [llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_ptr_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [] + >; + + class StoreImage2dDepthFloat : + Intrinsic<[], [llvm_float_ty, llvm_ptr_ty, + llvm_i32_ty, llvm_i32_ty], [] + >; + + class StoreImage2dArrayDepthFloat : + Intrinsic<[], [llvm_float_ty, llvm_ptr_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [] + >; +} + +// read image 1d +def int_HSAIL_rd_imgf_1d_s32 : + ReadImage1DIntr<"__hsail_rdimagef_1d_s32", llvm_float_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgf_1d_f32 : + ReadImage1DIntr<"__hsail_rdimagef_1d_f32", llvm_float_ty, llvm_float_ty>; +def int_HSAIL_rd_imgi_1d_s32 : + ReadImage1DIntr<"__hsail_rdimagei_1d_s32", llvm_i32_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgi_1d_f32 : + ReadImage1DIntr<"__hsail_rdimagei_1d_f32", llvm_i32_ty, llvm_float_ty>; +def int_HSAIL_rd_imgui_1d_s32 : + ReadImage1DIntr<"__hsail_rdimageui_1d_s32", llvm_i32_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgui_1d_f32 : + ReadImage1DIntr<"__hsail_rdimageui_1d_f32", llvm_i32_ty, llvm_float_ty>; + +// read image 1d array +def int_HSAIL_rd_imgf_1da_s32 : + ReadImage2DIntr<"__hsail_rdimagef_1da_s32", llvm_float_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgf_1da_f32 : + ReadImage2DIntr<"__hsail_rdimagef_1da_f32", llvm_float_ty, llvm_float_ty>; +def int_HSAIL_rd_imgi_1da_s32 : + ReadImage2DIntr<"__hsail_rdimagei_1da_s32", llvm_i32_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgi_1da_f32 : + ReadImage2DIntr<"__hsail_rdimagei_1da_f32", llvm_i32_ty, llvm_float_ty>; +def int_HSAIL_rd_imgui_1da_s32 : + ReadImage2DIntr<"__hsail_rdimageui_1da_s32", llvm_i32_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgui_1da_f32 : + ReadImage2DIntr<"__hsail_rdimageui_1da_f32", llvm_i32_ty, llvm_float_ty>; + +// read image 2d +def int_HSAIL_rd_imgf_2d_s32 : + ReadImage2DIntr<"__hsail_rdimagef_2d_s32", llvm_float_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgf_2d_f32 : + ReadImage2DIntr<"__hsail_rdimagef_2d_f32", llvm_float_ty, llvm_float_ty>; +def int_HSAIL_rd_imgi_2d_s32 : + ReadImage2DIntr<"__hsail_rdimagei_2d_s32", llvm_i32_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgi_2d_f32 : + ReadImage2DIntr<"__hsail_rdimagei_2d_f32", llvm_i32_ty, llvm_float_ty>; +def int_HSAIL_rd_imgui_2d_s32 : + ReadImage2DIntr<"__hsail_rdimageui_2d_s32", llvm_i32_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgui_2d_f32 : + ReadImage2DIntr<"__hsail_rdimageui_2d_f32", llvm_i32_ty, llvm_float_ty>; + +// read image 2d array +def int_HSAIL_rd_imgf_2da_s32 : + ReadImage3DIntr<"__hsail_rdimagef_2da_s32", llvm_float_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgf_2da_f32 : + ReadImage3DIntr<"__hsail_rdimagef_2da_f32", llvm_float_ty, llvm_float_ty>; +def int_HSAIL_rd_imgi_2da_s32 : + ReadImage3DIntr<"__hsail_rdimagei_2da_s32", llvm_i32_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgi_2da_f32 : + ReadImage3DIntr<"__hsail_rdimagei_2da_f32", llvm_i32_ty, llvm_float_ty>; +def int_HSAIL_rd_imgui_2da_s32 : + ReadImage3DIntr<"__hsail_rdimageui_2da_s32", llvm_i32_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgui_2da_f32 : + ReadImage3DIntr<"__hsail_rdimageui_2da_f32", llvm_i32_ty, llvm_float_ty>; + +// read image 3d +def int_HSAIL_rd_imgf_3d_s32 : + ReadImage3DIntr<"__hsail_rdimagef_3d_s32", llvm_float_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgf_3d_f32 : + ReadImage3DIntr<"__hsail_rdimagef_3d_f32", llvm_float_ty, llvm_float_ty>; +def int_HSAIL_rd_imgi_3d_s32 : + ReadImage3DIntr<"__hsail_rdimagei_3d_s32", llvm_i32_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgi_3d_f32 : + ReadImage3DIntr<"__hsail_rdimagei_3d_f32", llvm_i32_ty, llvm_float_ty>; +def int_HSAIL_rd_imgui_3d_s32 : + ReadImage3DIntr<"__hsail_rdimageui_3d_s32", llvm_i32_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgui_3d_f32 : + ReadImage3DIntr<"__hsail_rdimageui_3d_f32", llvm_i32_ty, llvm_float_ty>; + +// OpenCL 2.0 read image 2ddepth +def int_HSAIL_rd_imgf_2ddepth_s32 : + ReadImage2DDepthIntr<"__hsail_rdimagef_2ddepth_s32", llvm_float_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgf_2ddepth_f32 : + ReadImage2DDepthIntr<"__hsail_rdimagef_2ddepth_f32", llvm_float_ty, llvm_float_ty>; + +// OpenCL 2.0 read image 2dadepth +def int_HSAIL_rd_imgf_2dadepth_s32 : + ReadImage2DArrayDepthIntr<"__hsail_rdimagef_2dadepth_s32", llvm_float_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgf_2dadepth_f32 : + ReadImage2DArrayDepthIntr<"__hsail_rdimagef_2dadepth_f32", llvm_float_ty, llvm_float_ty>; + +// Load image intrinsics + +// load image 1d +def int_HSAIL_ld_imgf_1d_u32 : + LoadImage1DIntr<"__hsail_ldimagef_1d_u32", llvm_float_ty, llvm_i32_ty>; +def int_HSAIL_ld_imgi_1d_u32 : + LoadImage1DIntr<"__hsail_ldimagei_1d_u32", llvm_i32_ty, llvm_i32_ty>; +def int_HSAIL_ld_imgui_1d_u32 : + LoadImage1DIntr<"__hsail_ldimageui_1d_u32", llvm_i32_ty, llvm_i32_ty>; + +// load image 1d buffer +def int_HSAIL_ld_imgf_1db_u32 : + LoadImage1DIntr<"__hsail_ldimagef_1db_u32", llvm_float_ty, llvm_i32_ty>; +def int_HSAIL_ld_imgi_1db_u32 : + LoadImage1DIntr<"__hsail_ldimagei_1db_u32", llvm_i32_ty, llvm_i32_ty>; +def int_HSAIL_ld_imgui_1db_u32 : + LoadImage1DIntr<"__hsail_ldimageui_1db_u32", llvm_i32_ty, llvm_i32_ty>; + +// load image 1d array +def int_HSAIL_ld_imgf_1da_u32 : + LoadImage2DIntr<"__hsail_ldimagef_1da_u32", llvm_float_ty, llvm_i32_ty>; +def int_HSAIL_ld_imgi_1da_u32 : + LoadImage2DIntr<"__hsail_ldimagei_1da_u32", llvm_i32_ty, llvm_i32_ty>; +def int_HSAIL_ld_imgui_1da_u32 : + LoadImage2DIntr<"__hsail_ldimageui_1da_u32", llvm_i32_ty, llvm_i32_ty>; + +// load image 2d +def int_HSAIL_ld_imgf_2d_u32 : + LoadImage2DIntr<"__hsail_ldimagef_2d_u32", llvm_float_ty, llvm_i32_ty>; +def int_HSAIL_ld_imgi_2d_u32 : + LoadImage2DIntr<"__hsail_ldimagei_2d_u32", llvm_i32_ty, llvm_i32_ty>; +def int_HSAIL_ld_imgui_2d_u32 : + LoadImage2DIntr<"__hsail_ldimageui_2d_u32", llvm_i32_ty, llvm_i32_ty>; + +// load image 1d array +def int_HSAIL_ld_imgf_2da_u32 : + LoadImage3DIntr<"__hsail_ldimagef_2da_u32", llvm_float_ty, llvm_i32_ty>; +def int_HSAIL_ld_imgi_2da_u32 : + LoadImage3DIntr<"__hsail_ldimagei_2da_u32", llvm_i32_ty, llvm_i32_ty>; +def int_HSAIL_ld_imgui_2da_u32 : + LoadImage3DIntr<"__hsail_ldimageui_2da_u32", llvm_i32_ty, llvm_i32_ty>; + +// load image 3d +def int_HSAIL_ld_imgf_3d_u32 : + LoadImage3DIntr<"__hsail_ldimagef_3d_u32", llvm_float_ty, llvm_i32_ty>; +def int_HSAIL_ld_imgi_3d_u32 : + LoadImage3DIntr<"__hsail_ldimagei_3d_u32", llvm_i32_ty, llvm_i32_ty>; +def int_HSAIL_ld_imgui_3d_u32 : + LoadImage3DIntr<"__hsail_ldimageui_3d_u32", llvm_i32_ty, llvm_i32_ty>; + +// OpenCL 2.0 load image 2d depth +def int_HSAIL_ld_imgf_2ddepth_u32 : + LoadImage2DDepthIntr<"__hsail_ldimagef_2ddepth_u32", llvm_float_ty, llvm_i32_ty>; + +// OpenCL 2.0 load image 2dadepth +def int_HSAIL_ld_imgf_2dadepth_u32 : + LoadImage2DArrayDepthIntr<"__hsail_ldimagef_2dadepth_u32", llvm_float_ty, llvm_i32_ty>; + + +// Image store intrinsics +// store image 1d +def int_HSAIL_stimagef_1d_i32 : GCCBuiltin<"__hsail_stimagef_1d_i32">, + StoreImage1dFloat; +def int_HSAIL_stimagei_1d_i32 : GCCBuiltin<"__hsail_stimagei_1d_i32">, + StoreImage1dInt; +def int_HSAIL_stimageui_1d_i32 : GCCBuiltin<"__hsail_stimageui_1d_i32">, + StoreImage1dInt; + +// store image 1d array +def int_HSAIL_stimagef_1da_i32 : GCCBuiltin<"__hsail_stimagef_1da_i32">, + StoreImage2dFloat; +def int_HSAIL_stimagei_1da_i32 : GCCBuiltin<"__hsail_stimagei_1da_i32">, + StoreImage2dInt; +def int_HSAIL_stimageui_1da_i32 : GCCBuiltin<"__hsail_stimageui_1da_i32">, + StoreImage2dInt; + +// store image 1d buffer +def int_HSAIL_stimagef_1db_i32 : GCCBuiltin<"__hsail_stimagef_1db_i32">, + StoreImage1dFloat; +def int_HSAIL_stimagei_1db_i32 : GCCBuiltin<"__hsail_stimagei_1db_i32">, + StoreImage1dInt; +def int_HSAIL_stimageui_1db_i32 : GCCBuiltin<"__hsail_stimageui_1db_i32">, + StoreImage1dInt; + +// store image 2d +def int_HSAIL_stimagef_2d_i32 : GCCBuiltin<"__hsail_stimagef_2d_i32">, + StoreImage2dFloat; +def int_HSAIL_stimagei_2d_i32 : GCCBuiltin<"__hsail_stimagei_2d_i32">, + StoreImage2dInt; +def int_HSAIL_stimageui_2d_i32 : GCCBuiltin<"__hsail_stimageui_2d_i32">, + StoreImage2dInt; + +// store image 2d array +def int_HSAIL_stimagef_2da_i32 : GCCBuiltin<"__hsail_stimagef_2da_i32">, + StoreImage3dFloat; +def int_HSAIL_stimagei_2da_i32 : GCCBuiltin<"__hsail_stimagei_2da_i32">, + StoreImage3dInt; +def int_HSAIL_stimageui_2da_i32 : GCCBuiltin<"__hsail_stimageui_2da_i32">, + StoreImage3dInt; + +// store image 3d +def int_HSAIL_stimagef_3d_i32 : GCCBuiltin<"__hsail_stimagef_3d_i32">, + StoreImage3dFloat; +def int_HSAIL_stimagei_3d_i32 : GCCBuiltin<"__hsail_stimagei_3d_i32">, + StoreImage3dInt; +def int_HSAIL_stimageui_3d_i32 : GCCBuiltin<"__hsail_stimageui_3d_i32">, + StoreImage3dInt; + +// store image 2d depth +def int_HSAIL_stimagef_2ddepth_i32 : GCCBuiltin<"__hsail_stimagef_2ddepth_i32">, + StoreImage2dDepthFloat; + +// store image 2d array depth +def int_HSAIL_stimagef_2dadepth_i32 : GCCBuiltin<"__hsail_stimagef_2dadepth_i32">, + StoreImage2dArrayDepthFloat; + +// Image query +def int_HSAIL_query_width_1d : GCCBuiltin<"__hsail_query_width_1d">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_width_1db : GCCBuiltin<"__hsail_query_width_1db">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_width_1da : GCCBuiltin<"__hsail_query_width_1da">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_width_2d : GCCBuiltin<"__hsail_query_width_2d">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_width_2da : GCCBuiltin<"__hsail_query_width_2da">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_width_3d : GCCBuiltin<"__hsail_query_width_3d">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_width_2ddepth : GCCBuiltin<"__hsail_query_width_2ddepth">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_width_2dadepth : GCCBuiltin<"__hsail_query_width_2dadepth">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_height_2d : GCCBuiltin<"__hsail_query_height_2d">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_height_2da : GCCBuiltin<"__hsail_query_height_2da">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_height_3d : GCCBuiltin<"__hsail_query_height_3d">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_height_2ddepth : GCCBuiltin<"__hsail_query_height_2ddepth">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_height_2dadepth : GCCBuiltin<"__hsail_query_height_2dadepth">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_depth_3d : GCCBuiltin<"__hsail_depth_3d">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_format_1d : GCCBuiltin<"__hsail_query_format_1d">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_format_1db : GCCBuiltin<"__hsail_query_format_1db">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_format_1da : GCCBuiltin<"__hsail_query_format_1da">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_format_2d : GCCBuiltin<"__hsail_query_format_2d">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_format_2da : GCCBuiltin<"__hsail_query_format_2da">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_format_3d : GCCBuiltin<"__hsail_query_format_3d">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_order_1d : GCCBuiltin<"__hsail_query_order_1d">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_order_1db : GCCBuiltin<"__hsail_query_order_1db">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_order_1da : GCCBuiltin<"__hsail_query_order_1da">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_order_2d : GCCBuiltin<"__hsail_query_order_2d">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_order_2da : GCCBuiltin<"__hsail_query_order_2da">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_order_3d : GCCBuiltin<"__hsail_query_order_3d">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_array_1da : GCCBuiltin<"__hsail_query_array_1da">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_array_2da : GCCBuiltin<"__hsail_query_array_2da">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_array_2dadepth : GCCBuiltin<"__hsail_query_array_2dadepth">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_channelorder_2ddepth : GCCBuiltin<"__hsail_query_channelorder_2ddepth">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_channelorder_2dadepth : GCCBuiltin<"__hsail_query_channelorder_2dadepth">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_channeltype_2ddepth : GCCBuiltin<"__hsail_query_channeltype_2ddepth">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_channeltype_2dadepth : GCCBuiltin<"__hsail_query_channeltype_2dadepth">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_segmentp_global : GCCBuiltin<"__hsail_segmentp_global">, + Intrinsic<[llvm_i1_ty], [llvm_anyptr_ty], [IntrNoMem] +>; + +def int_HSAIL_segmentp_local : GCCBuiltin<"__hsail_segmentp_local">, + Intrinsic<[llvm_i1_ty], [llvm_anyptr_ty], [IntrNoMem] +>; + +def int_HSAIL_segmentp_private : GCCBuiltin<"__hsail_segmentp_private">, + Intrinsic<[llvm_i1_ty], [llvm_anyptr_ty], [IntrNoMem] +>; + +def int_HSAIL_nullptr_flat : GCCBuiltin<"__hsail_nullptr_flat">, + Intrinsic<[llvm_anyptr_ty], [], [IntrNoMem] +>; + +// FIXME: Should this be removed? It produces the same output as for flat. +def int_HSAIL_nullptr_global : GCCBuiltin<"__hsail_nullptr_global">, + Intrinsic<[llvm_anyptr_ty],[], [IntrNoMem] +>; + +def int_HSAIL_nullptr_group : GCCBuiltin<"__hsail_nullptr_group">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem] +>; + +def int_HSAIL_nullptr_private : GCCBuiltin<"__hsail_nullptr_private">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem] +>; + +// FIXME: Should this be removed? It produces the same output as for flat. +def int_HSAIL_nullptr_readonly : GCCBuiltin<"__hsail_nullptr_readonly">, + Intrinsic<[llvm_anyptr_ty], [], [IntrNoMem] +>; + +def int_HSAIL_nullptr_kernarg : GCCBuiltin<"__hsail_nullptr_kernarg">, + Intrinsic<[llvm_anyptr_ty], [], [IntrNoMem] +>; + +// ld_kernarg instructions have no side effects and can be CSE'd or +// even deleted if dead. +def int_HSAIL_ld_kernarg_u32 : GCCBuiltin<"__hsail_ld_kernarg_u32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_ld_kernarg_u64 : GCCBuiltin<"__hsail_ld_kernarg_u64">, + Intrinsic<[llvm_i64_ty], [llvm_i32_ty], [IntrNoMem] +>; + +} Index: lib/Target/HSAIL/HSAILKernel.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILKernel.h @@ -0,0 +1,131 @@ +//===-- HSAILKernel.h - HSAIL Kernel Class ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Definition of a HSAILKernel object and the various subclasses that +/// are used. +// +//===----------------------------------------------------------------------===// + +#ifndef _HSAIL_KERNEL_H_ +#define _HSAIL_KERNEL_H_ + +#include "HSAIL.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/Constant.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { +class HSAILSubtarget; +class HSAILTargetMachine; +/// structure that holds information for a single local/region address array +typedef struct _HSAILArrayMemRec { + uint32_t vecSize; // size of each vector + uint32_t offset; // offset into the memory section + uint32_t align; // alignment + bool isHW; // flag to specify if HW is used or SW is used + bool isRegion; // flag to specify if GDS is used or not +} HSAILArrayMem; + +/// structure that holds information about a constant address +/// space pointer that is a kernel argument +typedef struct _HSAILConstPtrRec { + const Value *base; + uint32_t size; + uint32_t offset; + uint32_t align; // alignment + uint32_t cbNum; // value of 0 means that it does not use hw CB + bool isArray; // flag to specify that this is an array + bool isArgument; // flag to specify that this is for a kernel argument + bool usesHardware; // flag to specify if hardware CB is used or not + std::string name; +} HSAILConstPtr; + +/// Structure that holds information for all local/region address +/// arrays in the kernel +typedef struct _HSAILLocalPrivateArgRec { + // SmallVector local; + std::string name; // Kernel Name +} HSAILLocalPrivateArg; + +/// Structure that holds information for each kernel argument +typedef struct _HSAILkernelArgRec { + uint32_t reqGroupSize[3]; // x,y,z sizes for group. + uint32_t reqRegionSize[3]; // x,y,z sizes for region. + SmallVector + argInfo; // information about argumetns. + bool mHasRWG; // true if reqd_work_group_size is specified. + bool mHasRWR; // true if reqd_work_region_size is specified. + + _HSAILkernelArgRec() { + mHasRWG = false; + mHasRWR = false; + } +} HSAILKernelAttr; + +/// Holds information for each kernel. +struct HSAILKernel { + uint32_t curSize; // local memory, hardware + software emulated + uint32_t curRSize; // region memory, hardware + software emulated + uint32_t curHWSize; // hardware local memory + uint32_t curHWRSize; // hardware region memory + uint32_t constSize; // software constant memory + + bool mKernel; // true if this is a kernel + std::string mName; + HSAILKernelAttr *sgv; // kernel attributes + + // vector containing constant pointer information + SmallVector constPtr; + + uint32_t constSizes[HW_MAX_NUM_CB]; // Size of each constant buffer + + // set that specifies the read-only images for the kernel + SmallSet readOnly; + + // set that specifies the write-only images for the kernel + SmallSet writeOnly; + + // set that specifies the read-write images for the kernel + SmallSet readWrite; + + // set that specifies the access type qulifiers for the kernel arguments + std::vector accessTypeQualifer; + + // Vector of constant pool offsets + SmallVector, DEFAULT_VEC_SLOTS> + CPOffsets; + + // Vector of kernel argument type names + std::vector ArgTypeNames; + + // Fields required for device enqueue. + bool EnqueuesKernel; // true if enqueues a kernel. + uint32_t KernelIndex; // positive value which deonotes the kernel index + + HSAILKernel() { + curSize = 0; + curRSize = 0; + curHWSize = 0; + curHWRSize = 0; + constSize = 0; + + mKernel = false; + sgv = nullptr; + + memset(constSizes, 0, sizeof(constSizes)); + EnqueuesKernel = false; + KernelIndex = -1; + } +}; +} // end llvm namespace + +#endif // _HSAIL_KERNEL_H_ Index: lib/Target/HSAIL/HSAILLoadStore.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILLoadStore.td @@ -0,0 +1,112 @@ +//===------------------------------------------------------*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//////////////////////////////////////////////////////////////////////////////// +// stof, ftos -- not matched, inserted in MachineInstr lowering +// 32 bit + + + +defm STOF : InstSegCvt_1Op_PtrTypes<"stof", BrigOpcode.STOF>; +defm FTOS : InstSegCvt_1Op_PtrTypes<"ftos", BrigOpcode.FTOS>; +defm SEGMENTP : InstSegCvt_1Op_Segmentp_Types<"segmentp", BrigOpcode.SEGMENTP>; + +class SegmentPPat : Pat< + (HSAILsegmentp (i32 timm:$segment), + (i1 timm:$nonull), + (srcTy.VT (GPROrImm srcTy.VT:$src0))), + (!cast("SEGMENTP_B1"#srcTy.InstName) $segment, $nonull, $src0, BrigType.B1, srcTy.BT) +>; + + +let Predicates = [LargeModel] in { + def : SegmentPPat; +} + +let Predicates = [SmallModel] in { + def : SegmentPPat; +} + +let isNotDuplicable = 1, hasCtrlDep = 1, hasSideEffects = 1 in { + def ARG_DECL : HSAILInst<(outs), (ins PtrRC:$symbol, BrigType:$TypeLength, ArraySize:$size, + ArgDeclAlignment:$alignment), + "${alignment}arg$TypeLength $symbol$size", []>; +} + +// FIXME: If the MEMOP isn't explicitly typed in output, counts as +// wrong number of operands. +class LDPat : Pat < + (vt (ldnode (LoadAddr MEMOP:$address, + BrigSegment:$segment, + BrigAlignment:$align, + BrigType:$TypeLength, + BrigWidth:$width, + BrigMemoryModifierMask:$mask))), + (inst MEMOP:$address, $TypeLength, $segment, $align, $width, $mask) +>; + + +defm LD : LD_Types<"ld", BrigOpcode.LD>; + + +def : LDPat; +def : LDPat; +def : LDPat; +def : LDPat; +def : LDPat; +def : LDPat; +def : LDPat; +def : LDPat; +def : LDPat; +def : LDPat; + +let hasSideEffects = 1, hasCtrlDep = 1 in { + // It is not safe to move ld_arg as it can be in an argscope + defm RARG_LD : LD_Types<"ld", BrigOpcode.LD>; +} + +class STPat : Pat < + (node (vt (GPROrImm vt:$src)), + (StoreAddr MEMOP:$address, + BrigSegment:$segment, + BrigAlignment:$alignment, + BrigType:$TypeLength)), + (inst $src, MEMOP:$address, $TypeLength, $segment, $alignment) +>; + +defm ST : ST_Types<"st", BrigOpcode.ST>; + +// TODO: Promote float stores to integers. +def : STPat; +def : STPat; +def : STPat; +def : STPat; +def : STPat; +def : STPat; + + +// We need pseudos to implement condition register spilling due to a +// limitation storeRegToStackSlot currently has where it assumes only +// 1 instruction is created for spilling. +let isPseudo = 1 in { + def SPILL_B1 : ST<"spill_b1", BrigOpcode.NOP, + (ins CR:$src, MEMOP:$address, + BrigType:$TypeLength, BrigSegment:$segment, + BrigAlignment:$align) + >; + + def RESTORE_B1 : LD<"restore_b1", BrigOpcode.NOP, + (outs CR:$dest) + >; +} + +//////////////////////////////////////////////////////////////////////////////// +// load memory address + +defm LDA : InstAddr_1Op_PtrTypes<"lda", BrigOpcode.LDA>; Index: lib/Target/HSAIL/HSAILMCInstLower.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILMCInstLower.h @@ -0,0 +1,34 @@ +//===- HSAILMCInstLower.h MachineInstr Lowering Interface -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +/// \file +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_HSAILMCINSTLOWER_H +#define LLVM_LIB_TARGET_HSAIL_HSAILMCINSTLOWER_H + +namespace llvm { + +class HSAILAsmPrinter; +class MachineInstr; +class MCContext; +class MCInst; + +class HSAILMCInstLower { + MCContext &Ctx; + const HSAILAsmPrinter &AP; + +public: + HSAILMCInstLower(MCContext &Ctx, const HSAILAsmPrinter &AP); + + /// \brief Lower a MachineInstr to an MCInst + void lower(const MachineInstr *MI, MCInst &OutMI) const; +}; + +} // End namespace llvm + +#endif Index: lib/Target/HSAIL/HSAILMCInstLower.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILMCInstLower.cpp @@ -0,0 +1,91 @@ +//===- HSAILMCInstLower.cpp - Lower HSAIL MachineInstr to an MCInst ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Code to lower HSAIL MachineInstrs to their corresponding MCInst. +// +//===----------------------------------------------------------------------===// +// + +#include "HSAILMCInstLower.h" +#include "HSAIL.h" +#include "HSAILAsmPrinter.h" +#include "InstPrinter/HSAILInstPrinter.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" + +using namespace llvm; + +HSAILMCInstLower::HSAILMCInstLower(MCContext &ctx, const HSAILAsmPrinter &ap) + : Ctx(ctx), AP(ap) {} + +void HSAILMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { + + OutMI.setOpcode(MI->getOpcode()); + + for (const MachineOperand &MO : MI->explicit_operands()) { + MCOperand MCOp; + switch (MO.getType()) { + default: + llvm_unreachable("unknown operand type"); + case MachineOperand::MO_FPImmediate: { + const APFloat &FloatValue = MO.getFPImm()->getValueAPF(); + + if (&FloatValue.getSemantics() == &APFloat::IEEEsingle) + MCOp = MCOperand::CreateFPImm(FloatValue.convertToFloat()); + else if (&FloatValue.getSemantics() == &APFloat::IEEEdouble) + MCOp = MCOperand::CreateFPImm(FloatValue.convertToDouble()); + else + llvm_unreachable("Unhandled floating point type"); + break; + } + case MachineOperand::MO_Immediate: + MCOp = MCOperand::CreateImm(MO.getImm()); + break; + case MachineOperand::MO_Register: + MCOp = MCOperand::CreateReg(MO.getReg()); + break; + case MachineOperand::MO_MachineBasicBlock: + MCOp = MCOperand::CreateExpr( + MCSymbolRefExpr::Create(MO.getMBB()->getSymbol(), Ctx)); + break; + case MachineOperand::MO_GlobalAddress: { + const GlobalValue *GV = MO.getGlobal(); + + SmallString<256> Name; + AP.getHSAILMangledName(Name, GV); + + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name); + + MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(Sym, Ctx)); + break; + } + case MachineOperand::MO_ExternalSymbol: { + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Twine('%') + MO.getSymbolName()); + MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(Sym, Ctx)); + break; + } + case MachineOperand::MO_MCSymbol: { + MCSymbol *Sym = MO.getMCSymbol(); + MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(Sym, Ctx)); + break; + } + case MachineOperand::MO_TargetIndex: { + llvm_unreachable("Don't know how to lower target index"); + break; + } + } + OutMI.addOperand(MCOp); + } +} Index: lib/Target/HSAIL/HSAILMachineFunctionInfo.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILMachineFunctionInfo.h @@ -0,0 +1,290 @@ +//==-- HSAILMachineFunctionInfo.h -------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file declares HSAIL-specific per-machine-function information +// +//===----------------------------------------------------------------------===// + +#ifndef _HSAILMACHINEFUNCTIONINFO_H_ +#define _HSAILMACHINEFUNCTIONINFO_H_ + +#include "HSAIL.h" +#include "HSAILKernel.h" +#include "HSAILParamManager.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/IR/Function.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include +#include +#include + +namespace llvm { +class HSAILSubtarget; +class HSAILPrintfInfo { + uint32_t mPrintfID; + SmallVector mOperands; + +public: + void addOperand(size_t idx, uint32_t size); + uint32_t getPrintfID(); + void setPrintfID(uint32_t idx); + size_t getNumOperands(); + uint32_t getOperandID(uint32_t idx); +}; // class HSAILPrintfInfo + +enum NameDecorationStyle { NONE, StdCall, FastCall }; +typedef struct SamplerInfoRec { + std::string name; // The name of the sampler + uint32_t val; // The value of the sampler + uint32_t idx; // The sampler resource id +} SamplerInfo; +// Some typedefs that will help with using the various iterators +// of the machine function info class. +typedef StringMap::iterator sampler_iterator; +typedef DenseSet::iterator func_iterator; +typedef DenseSet::iterator intr_iterator; +typedef DenseSet::iterator sema_iterator; +typedef DenseSet::iterator read_image1d_iterator; +typedef DenseSet::iterator write_image1d_iterator; +typedef DenseSet::iterator read_image1d_array_iterator; +typedef DenseSet::iterator write_image1d_array_iterator; +typedef DenseSet::iterator read_image1d_buffer_iterator; +typedef DenseSet::iterator write_image1d_buffer_iterator; +typedef DenseSet::iterator read_image2d_iterator; +typedef DenseSet::iterator write_image2d_iterator; +typedef DenseSet::iterator read_image2d_array_iterator; +typedef DenseSet::iterator write_image2d_array_iterator; +typedef DenseSet::iterator read_image3d_iterator; +typedef DenseSet::iterator write_image3d_iterator; +typedef DenseSet::iterator read_ptr_iterator; +typedef DenseSet::iterator error_iterator; +typedef std::map::iterator printf_iterator; +typedef std::set::iterator func_md_iterator; +typedef std::vector::iterator kernel_md_iterator; +// HSAILMachineFunctionInfo - This class is +// derived from MachineFunction private +// hsail target-specific information for each MachineFunction +class HSAILMachineFunctionInfo : public MachineFunctionInfo { + // The size in bytes required to host all of the kernel arguments. + // -1 means this value has not been determined yet. + int32_t mArgSize; + + // The size in bytes required to host the stack and the kernel arguments + // in private memory. + // -1 means this value has not been determined yet. + int32_t mScratchSize; + + // The size in bytes required to host the the kernel arguments + // on the stack. + // -1 means this value has not been determined yet. + int32_t mStackSize; + + // The size in bytes required to host private variables + // -1 means this value has not been determined yet. + int32_t mPrivateMemSize; + + // The size in bytes required to host group variables + // -1 means this value has not been determined yet. + int32_t mGroupMemSize; + + /// A map of constant to literal mapping for all of the 32bit or + /// smaller literals in the current function. + std::map mIntLits; + + /// A map of name to sampler information that is used to emit + /// metadata to the IL stream that the runtimes can use for + /// hardware setup. + StringMap mSamplerMap; + + /// Set of all functions that this function calls. + DenseSet mFuncs; + + /// Set of all intrinsics that this function calls. + DenseSet mIntrs; + + /// Set of all the raw uavs. + DenseSet mRawUAV; + + /// Set of all semaphores + DenseSet mSemaphore; + + /// Set of all the read-only pointers + DenseSet mReadPtr; + + /// A set of all errors that occured in the backend for this function. + DenseSet mErrors; + + /// A set of all of the metadata that is used for the current function. + std::set mMetadataFunc; + + /// A set of all of the metadata that is used for the function wrapper. + std::vector mMetadataKernel; + + SmallVector mArgRegs; + + /// A number of 64 bit register slots reserved for $s registers. + unsigned RegisterPartitioning; + + /// Information about the kernel, NULL if the function is not a kernel. + HSAILKernel *mKernel; + + /// Pointer to the machine function that this information belongs to. + MachineFunction *mMF; + + /// Pointer to the subtarget for this function. + const HSAILSubtarget *mSTM; + + bool HasSpilledCRs; + bool HasScavengerSpill; + +public: + explicit HSAILMachineFunctionInfo(MachineFunction &MF); + + // FIXME: Remove these + void setUsesLocal() {} + void setUsesRegion() {} + + bool usesHWConstant(std::string name) const; + bool isKernel() const; + HSAILKernel *getKernel(); + + /// Get the size in bytes that are required to host all of + /// arguments and stack memory in scratch. + uint32_t getScratchSize(); + + /// Get the size in bytes that are required to host all of + /// private memory in scratch. + size_t getPrivateSize(); + + /// Get the size in bytes that are required to host all of + /// group memory. + size_t getGroupSize(); + + /// Get the size in bytes that is required to host all of + /// the arguments on the stack. + uint32_t getStackSize(); + + /// + /// @param val value to add the lookup table + /// @param Opcode opcode of the literal instruction + /// @brief adds the specified value of the type represented by the + /// Opcode + /// to the literal to integer and integer to literal mappings. + /// + /// Add a 32bit integer value to the literal table. + // uint32_t addi32Literal(uint32_t val, int Opcode = HSAIL::LOADCONST_i32); + uint32_t addi32Literal(uint32_t val, int Opcode = 0); + + // Iterators that point to the beginning and end of the sampler map. + sampler_iterator sampler_begin() { return mSamplerMap.begin(); } + sampler_iterator sampler_end() { return mSamplerMap.end(); } + + /// Add called functions to the set of all functions this function calls. + void addCalledFunc(uint32_t id) { mFuncs.insert(id); } + void eraseCalledFunc(uint32_t id) { mFuncs.erase(id); } + size_t func_size() { return mFuncs.size(); } + bool func_empty() { return mFuncs.empty(); } + func_iterator func_begin() { return mFuncs.begin(); } + func_iterator func_end() { return mFuncs.end(); } + + inline iterator_range funcs() { + return iterator_range(func_begin(), func_end()); + } + + /// Add a semaphore + void sema_insert(uint32_t id) { mSemaphore.insert(id); } + bool sema_count(uint32_t id) { return mSemaphore.count(id); } + size_t sema_size() { return mSemaphore.size(); } + sema_iterator sema_begin() { return mSemaphore.begin(); } + sema_iterator sema_end() { return mSemaphore.end(); } + + /// Add a raw uav id. + void uav_insert(uint32_t id) { mRawUAV.insert(id); } + + /// Add a pointer to the known set of read-only pointers + void add_read_ptr(const Value *ptr) { mReadPtr.insert(ptr); } + bool read_ptr_count(const Value *ptr) { return mReadPtr.count(ptr); } + bool read_size() { return mReadPtr.size(); } + + // Add an error to the output for the current function. + typedef enum { + RELEASE_ONLY, /// Only emit error message in release mode. + DEBUG_ONLY, /// Only emit error message in debug mode. + ALWAYS /// Always emit the error message. + } ErrorMsgEnum; + + // FIXME: Remove these and use normal error reporting mechanism. + /// Add an error message to the set of all error messages. + void addErrorMsg(const char *msg, ErrorMsgEnum val = ALWAYS); + bool errors_empty() { return mErrors.empty(); } + error_iterator errors_begin() { return mErrors.begin(); } + error_iterator errors_end() { return mErrors.end(); } + + /// Add a string to the metadata set for a function/kernel wrapper + void addMetadata(const char *md, bool kernelOnly = false); + void addMetadata(std::string md, bool kernelOnly = false); + func_md_iterator func_md_begin() { return mMetadataFunc.begin(); } + func_md_iterator func_md_end() { return mMetadataFunc.end(); } + kernel_md_iterator kernel_md_begin() { return mMetadataKernel.begin(); } + kernel_md_iterator kernel_md_end() { return mMetadataKernel.end(); } + + /// Query to find out if we are a signed or unsigned integer type. + bool isSignedIntType(const Value *ptr); + + /// Query to find out if we are a volatile pointer. + bool isVolatilePointer(const Value *ptr); + + /// Query to find out if we are a restrict pointer. + bool isRestrictPointer(const Value *ptr); + + /// Query to find out if we are a constant argument. + bool isConstantArgument(const Value *ptr); + + /// add/retrieve the argument registers numbers + void addArgReg(unsigned arg) { mArgRegs.push_back(arg); } + unsigned getArgReg(unsigned arg) { + return (arg < mArgRegs.size()) ? mArgRegs[arg] : arg; + } + + void setRegisterPartitioning(unsigned RegSlots) { + RegisterPartitioning = RegSlots; + } + unsigned getRegisterPartitioning() const { return RegisterPartitioning; } + + HSAILParamManager &getParamManager() { return ParamManager; } + const HSAILParamManager &getParamManager() const { return ParamManager; } + + + bool hasSpilledCRs() const { + return HasSpilledCRs; + } + + void setHasSpilledCRs(bool Spill = true) { + HasSpilledCRs = Spill; + } + + bool hasScavengerSpill() const { + return HasScavengerSpill; + } + + void setHasScavengerSpill(bool Spill = true) { + HasScavengerSpill = Spill; + } + +private: + HSAILParamManager ParamManager; +}; +} // llvm namespace + +#endif // _HSAILMACHINEFUNCTIONINFO_H_ Index: lib/Target/HSAIL/HSAILMachineFunctionInfo.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILMachineFunctionInfo.cpp @@ -0,0 +1,386 @@ +//===-- HSAILMachineFunctionInfo.cpp --------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HSAILMachineFunctionInfo.h" +#include "HSAILModuleInfo.h" +#include "HSAILUtilityFunctions.h" +using namespace llvm; + +static const HSAILConstPtr *getConstPtr(const HSAILKernel *krnl, + const std::string &arg) { + if (!krnl) { + return nullptr; + } + + SmallVector::const_iterator begin, end; + for (begin = krnl->constPtr.begin(), end = krnl->constPtr.end(); begin != end; + ++begin) { + if (!strcmp(begin->name.data(), arg.c_str())) { + return &(*begin); + } + } + return nullptr; +} + +void HSAILPrintfInfo::addOperand(size_t idx, uint32_t size) { + mOperands.resize((unsigned)(idx + 1)); + mOperands[(unsigned)idx] = size; +} + +uint32_t HSAILPrintfInfo::getPrintfID() { return mPrintfID; } + +void HSAILPrintfInfo::setPrintfID(uint32_t id) { mPrintfID = id; } + +size_t HSAILPrintfInfo::getNumOperands() { return mOperands.size(); } + +uint32_t HSAILPrintfInfo::getOperandID(uint32_t idx) { return mOperands[idx]; } + +HSAILMachineFunctionInfo::HSAILMachineFunctionInfo(MachineFunction &MF) + : RegisterPartitioning(0), + HasSpilledCRs(false), + HasScavengerSpill(false), + ParamManager(MF.getTarget().getDataLayout()) { + const Function *F = MF.getFunction(); + mMF = &MF; + MachineModuleInfo &mmi = MF.getMMI(); + const HSAILTargetMachine *TM = + reinterpret_cast(&MF.getTarget()); + HSAILModuleInfo *AMI = &(mmi.getObjFileInfo()); + AMI->processModule(mmi.getModule(), TM); + for (Module::const_iterator I = F->getParent()->begin(), + E = F->getParent()->end(); + I != E; ++I) { + // Map all the known names to a unique number + AMI->getOrCreateFunctionID(I->getName()); + } + mSTM = TM->getSubtargetImpl(); + mKernel = AMI->getKernel(F->getName()); + + mScratchSize = -1; + mPrivateMemSize = -1; + mGroupMemSize = -1; + mArgSize = -1; + mStackSize = -1; +} + +bool HSAILMachineFunctionInfo::usesHWConstant(std::string name) const { + const HSAILConstPtr *curConst = getConstPtr(mKernel, name); + if (curConst) { + return curConst->usesHardware; + } else { + return false; + } +} + +bool HSAILMachineFunctionInfo::isKernel() const { + return mKernel != nullptr && mKernel->mKernel; +} + +HSAILKernel *HSAILMachineFunctionInfo::getKernel() { return mKernel; } + +uint32_t HSAILMachineFunctionInfo::getScratchSize() { + const DataLayout *DL = mMF->getTarget().getDataLayout(); + + if (mScratchSize == -1) { + mScratchSize = 0; + Function::const_arg_iterator I = mMF->getFunction()->arg_begin(); + Function::const_arg_iterator Ie = mMF->getFunction()->arg_end(); + while (I != Ie) { + // FIXME: Mishandling byval structs + Type *curType = I->getType(); + mScratchSize += RoundUpToAlignment(DL->getTypeStoreSize(curType), 16); + ++I; + } + // mScratchSize += ((mScratchSize + 15) & ~15); // possible typo: doubling + // mScratchSize + } + return (uint32_t)mScratchSize; +} + +size_t HSAILMachineFunctionInfo::getPrivateSize() { + if (mPrivateMemSize == -1) { + const DataLayout *DL = mMF->getTarget().getDataLayout(); + + mPrivateMemSize = 0; + SmallPtrSet thisFuncPvtVarsSet; + for (MachineFunction::const_iterator I = mMF->begin(), E = mMF->end(); + I != E; ++I) { + for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); + II != IE; ++II) { + const MachineInstr *LastMI = II; + for (unsigned int opNum = 0; opNum < LastMI->getNumOperands(); + opNum++) { + const MachineOperand &MO = LastMI->getOperand(opNum); + if (MO.getType() == MachineOperand::MO_GlobalAddress) { + if (const GlobalVariable *GV = + dyn_cast(MO.getGlobal())) { + if (GV->getType()->getAddressSpace() == + HSAILAS::PRIVATE_ADDRESS) { + if (thisFuncPvtVarsSet.insert(GV).second) { + mPrivateMemSize += + DL->getTypeAllocSize(GV->getType()->getElementType()); + } + } + } + } + } + } + } + mPrivateMemSize = ((mPrivateMemSize + 15) & ~15); + } + return (uint32_t)mPrivateMemSize; +} + +size_t HSAILMachineFunctionInfo::getGroupSize() { + if (mGroupMemSize == -1) { + const DataLayout *DL = mMF->getTarget().getDataLayout(); + + mGroupMemSize = 0; + SmallPtrSet thisFuncGrpVarsSet; + for (MachineFunction::const_iterator I = mMF->begin(), E = mMF->end(); + I != E; ++I) { + for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); + II != IE; ++II) { + const MachineInstr *LastMI = II; + for (unsigned int opNum = 0; opNum < LastMI->getNumOperands(); + opNum++) { + const MachineOperand &MO = LastMI->getOperand(opNum); + if (MO.getType() == MachineOperand::MO_GlobalAddress) { + if (const GlobalVariable *GV = + dyn_cast(MO.getGlobal())) { + if (GV->getType()->getAddressSpace() == HSAILAS::GROUP_ADDRESS) { + if (thisFuncGrpVarsSet.insert(GV).second) { + mGroupMemSize += + DL->getTypeAllocSize(GV->getType()->getElementType()); + } + } + } + } + } + } + } + mGroupMemSize = ((mGroupMemSize + 15) & ~15); + } + return (uint32_t)mGroupMemSize; +} + +uint32_t HSAILMachineFunctionInfo::getStackSize() { + if (mStackSize == -1) { + uint32_t privSize = 0; + const MachineFrameInfo *MFI = mMF->getFrameInfo(); + privSize = MFI->getOffsetAdjustment() + MFI->getStackSize(); + const HSAILTargetMachine *TM = + reinterpret_cast(&mMF->getTarget()); + bool addStackSize = TM->getOptLevel() == CodeGenOpt::None; + Function::const_arg_iterator I = mMF->getFunction()->arg_begin(); + Function::const_arg_iterator Ie = mMF->getFunction()->arg_end(); + while (I != Ie) { + Type *curType = I->getType(); + ++I; + if (dyn_cast(curType)) { + Type *CT = dyn_cast(curType)->getElementType(); + if (CT->isStructTy() && + dyn_cast(curType)->getAddressSpace() == + HSAILAS::PRIVATE_ADDRESS) { + addStackSize = true; + } + } + } + if (addStackSize) { + privSize += getScratchSize(); + } + mStackSize = privSize; + } + return (uint32_t)mStackSize; +} + +// FIXME: Remove this +uint32_t HSAILMachineFunctionInfo::addi32Literal(uint32_t val, int Opcode) { + return mIntLits[val]; +} + +void HSAILMachineFunctionInfo::addErrorMsg(const char *msg, ErrorMsgEnum val) { + if (val == DEBUG_ONLY) { +#if defined(DEBUG) || defined(_DEBUG) + mErrors.insert(msg); +#endif + } else if (val == RELEASE_ONLY) { +#if !defined(DEBUG) && !defined(_DEBUG) + mErrors.insert(msg); +#endif + } else if (val == ALWAYS) { + mErrors.insert(msg); + } +} + +void HSAILMachineFunctionInfo::addMetadata(const char *md, bool kernelOnly) { + addMetadata(std::string(md), kernelOnly); +} + +void HSAILMachineFunctionInfo::addMetadata(std::string md, bool kernelOnly) { + if (kernelOnly) { + mMetadataKernel.push_back(md); + } else { + mMetadataFunc.insert(md); + } +} + +bool HSAILMachineFunctionInfo::isSignedIntType(const Value *ptr) { + if (!mSTM->supportMetadata30()) + return true; + std::string signedNames = "llvm.signedOrSignedpointee.annotations."; + std::string argName = ptr->getName(); + if (!mMF) + return false; + signedNames += mMF->getFunction()->getName(); + const GlobalVariable *GV = + mMF->getFunction()->getParent()->getGlobalVariable(signedNames); + if (!GV || !GV->hasInitializer()) + return false; + const ConstantArray *CA = dyn_cast(GV->getInitializer()); + if (!CA) + return false; + for (uint32_t start = 0, stop = CA->getNumOperands(); start < stop; ++start) { + const ConstantExpr *nameField = + dyn_cast(CA->getOperand(start)); + if (!nameField) + continue; + + const GlobalVariable *nameGV = + dyn_cast(nameField->getOperand(0)); + if (!nameGV || !nameGV->hasInitializer()) + continue; + + const ConstantDataSequential *nameArray = + dyn_cast(nameGV->getInitializer()); + if (!nameArray) + continue; + + std::string nameStr = nameArray->getAsString(); + // We don't want to include the newline + if (!nameStr.compare(0, nameStr.length() - 1, argName)) + return true; + } + return false; +} +bool HSAILMachineFunctionInfo::isVolatilePointer(const Value *ptr) { + if (!mSTM->supportMetadata30()) + return false; + std::string signedNames = "llvm.volatilepointer.annotations."; + std::string argName = ptr->getName(); + if (!mMF) + return false; + signedNames += mMF->getFunction()->getName(); + const GlobalVariable *GV = + mMF->getFunction()->getParent()->getGlobalVariable(signedNames); + if (!GV || !GV->hasInitializer()) + return false; + const ConstantArray *CA = dyn_cast(GV->getInitializer()); + if (!CA) + return false; + for (uint32_t start = 0, stop = CA->getNumOperands(); start < stop; ++start) { + const ConstantExpr *nameField = + dyn_cast(CA->getOperand(start)); + if (!nameField) + continue; + + const GlobalVariable *nameGV = + dyn_cast(nameField->getOperand(0)); + if (!nameGV || !nameGV->hasInitializer()) + continue; + + const ConstantDataSequential *nameArray = + dyn_cast(nameGV->getInitializer()); + if (!nameArray) + continue; + + std::string nameStr = nameArray->getAsString(); + // We don't want to include the newline + if (!nameStr.compare(0, nameStr.length() - 1, argName)) + return true; + } + return false; +} +bool HSAILMachineFunctionInfo::isRestrictPointer(const Value *ptr) { + if (!mSTM->supportMetadata30()) + return false; + std::string signedNames = "llvm.restrictpointer.annotations."; + std::string argName = ptr->getName(); + if (!mMF) + return false; + signedNames += mMF->getFunction()->getName(); + const GlobalVariable *GV = + mMF->getFunction()->getParent()->getGlobalVariable(signedNames); + if (!GV || !GV->hasInitializer()) + return false; + const ConstantArray *CA = dyn_cast(GV->getInitializer()); + if (!CA) + return false; + for (uint32_t start = 0, stop = CA->getNumOperands(); start < stop; ++start) { + const ConstantExpr *nameField = + dyn_cast(CA->getOperand(start)); + if (!nameField) + continue; + + const GlobalVariable *nameGV = + dyn_cast(nameField->getOperand(0)); + if (!nameGV || !nameGV->hasInitializer()) + continue; + + const ConstantDataSequential *nameArray = + dyn_cast(nameGV->getInitializer()); + if (!nameArray) + continue; + + std::string nameStr = nameArray->getAsString(); + // We don't want to include the newline + if (!nameStr.compare(0, nameStr.length() - 1, argName)) + return true; + } + return false; +} + +bool HSAILMachineFunctionInfo::isConstantArgument(const Value *ptr) { + if (!mSTM->supportMetadata30()) + return false; + std::string signedNames = "llvm.argtypeconst.annotations."; + std::string argName = ptr->getName(); + if (!mMF) + return false; + signedNames += mMF->getFunction()->getName(); + const GlobalVariable *GV = + mMF->getFunction()->getParent()->getGlobalVariable(signedNames); + if (!GV || !GV->hasInitializer()) + return false; + const ConstantArray *CA = dyn_cast(GV->getInitializer()); + if (!CA) + return false; + for (uint32_t start = 0, stop = CA->getNumOperands(); start < stop; ++start) { + const ConstantExpr *nameField = + dyn_cast(CA->getOperand(start)); + if (!nameField) + continue; + + const GlobalVariable *nameGV = + dyn_cast(nameField->getOperand(0)); + if (!nameGV || !nameGV->hasInitializer()) + continue; + + const ConstantDataSequential *nameArray = + dyn_cast(nameGV->getInitializer()); + if (!nameArray) + continue; + + std::string nameStr = nameArray->getAsString(); + // We don't want to include the newline + if (!nameStr.compare(0, nameStr.length() - 1, argName)) + return true; + } + return false; +} Index: lib/Target/HSAIL/HSAILMetadata.hpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILMetadata.hpp @@ -0,0 +1,199 @@ +// +// taken from hsa/compiler/lib/include/aclTypes.h +// + +#ifndef _HSAIL_METADATA_HPP +#define _HSAIL_METADATA_HPP + +typedef struct _md_arg_type_0_7 argType; +typedef struct _md_printf_fmt_0_7 printfFmt; + +// Enumerations for the various argument types. +typedef enum argTypeEnum { + ARG_TYPE_ERROR = 0, + ARG_TYPE_SAMPLER = 1, + ARG_TYPE_IMAGE = 2, + ARG_TYPE_COUNTER = 3, + ARG_TYPE_VALUE = 4, + ARG_TYPE_POINTER = 5, + ARG_TYPE_SEMAPHORE = 6, + ARG_TYPE_QUEUE = 7, // enum for device enqueue + ARG_TYPE_LAST = 8 +} ArgType; + +// Enumerations of the valid data types for pass by value and +// pass by pointer kernel arguments. +typedef enum dataTypeEnum { + DATATYPE_ERROR = 0, + DATATYPE_i1 = 1, + DATATYPE_i8 = 2, + DATATYPE_i16 = 3, + DATATYPE_i32 = 4, + DATATYPE_i64 = 5, + DATATYPE_u8 = 6, + DATATYPE_u16 = 7, + DATATYPE_u32 = 8, + DATATYPE_u64 = 9, + DATATYPE_f16 = 10, + DATATYPE_f32 = 11, + DATATYPE_f64 = 12, + DATATYPE_f80 = 13, + DATATYPE_f128 = 14, + DATATYPE_struct = 15, + DATATYPE_union = 16, + DATATYPE_event = 17, + DATATYPE_opaque = 18, + DATATYPE_unknown = 19, + DATATYPE_LAST = 20 +} ArgDataType; + +// Enumerations of the valid memory types for pass by pointer +// kernel arguments +typedef enum memoryTypeEnum { + PTR_MT_ERROR = 0, // Error + PTR_MT_GLOBAL = 1, // global buffer + PTR_MT_SCRATCH_EMU = 2, // SW emulated private memory + PTR_MT_LDS_EMU = 3, // SW emulated local memory + PTR_MT_UAV = 4, // uniformed access vector memory + PTR_MT_CONSTANT_EMU = 5, // SW emulated constant memory + PTR_MT_GDS_EMU = 6, // SW emulated region memory + PTR_MT_LDS = 7, // HW local memory + PTR_MT_SCRATCH = 8, // HW private memory + PTR_MT_CONSTANT = 9, // HW constant memory + PTR_MT_GDS = 10, // HW region memory + PTR_MT_UAV_SCRATCH = 11, // SI and later HW private memory + PTR_MT_UAV_CONSTANT = 12, // SI and later HW constant memory + PTR_MT_LAST = 13 +} MemoryType; + +// Enumeration that specifies the various access types for a pointer/image. +typedef enum imageTypeEnum { + ACCESS_TYPE_ERROR = 0, + ACCESS_TYPE_RO = 1, + ACCESS_TYPE_WO = 2, + ACCESS_TYPE_RW = 3, + ACCESS_TYPE_LAST = 4 +} AccessType; + +//! An enumeration that maps memory types to index values +//! An enumeration that maps Resource type to index values +typedef enum _rt_gpu_resource_type_rec { + RT_RES_UAV = 0, // UAV resources + RT_RES_PRI = 1, // Private resources + RT_RES_LDS = 2, // LDS resources + RT_RES_GDS = 3, // GDS resources + RT_RES_CON = 4, // Constant resources + RT_RES_LAST = 5 +} aclGPUResource; + +typedef enum _rt_gpu_mem_sizes { + RT_MEM_HW_LOCAL = 0, + RT_MEM_SW_LOCAL = 1, + RT_MEM_HW_PRIVATE = 2, + RT_MEM_SW_PRIVATE = 3, + RT_MEM_HW_REGION = 4, + RT_MEM_SW_REGION = 5, + RT_MEM_LAST = 6 +} aclGPUMemSizes; + +typedef struct _md_arg_type_0_7 { + size_t struct_size; + size_t argNameSize; + size_t typeStrSize; + const char *argStr; + const char *typeStr; + union { + struct { // Struct for sampler arguments + unsigned ID; + unsigned isKernelDefined; + unsigned value; + } sampler; + struct { // Struct for image arguments + unsigned resID; + unsigned cbNum; + unsigned cbOffset; + AccessType type; + bool is2D; + bool is1D; + bool isArray; + bool isBuffer; + } image; + struct { // struct for atomic counter arguments + unsigned is32bit; + unsigned resID; + unsigned cbNum; + unsigned cbOffset; + } counter; + struct { // struct for semaphore arguments + unsigned resID; + unsigned cbNum; + unsigned cbOffset; + } sema; + struct { // struct for pass by value arguments + unsigned numElements; + unsigned cbNum; + unsigned cbOffset; + ArgDataType data; + } value; + struct { // struct for pass by pointer arguments + unsigned numElements; + unsigned cbNum; + unsigned cbOffset; + unsigned bufNum; + unsigned align; + ArgDataType data; + MemoryType memory; + AccessType type; + bool isVolatile; + bool isRestrict; + bool isPipe; + } pointer; + } arg; + ArgType type; + bool isConst; +} argType_0_7; + +//! A structure that holds information for printf +// The format in memory of this structure is +// ------------ +// | printfFmt| +// ------------ +// |->argSizes| +// ------------ +// |->fmrStr | +// ------------ + +typedef struct _md_printf_fmt_0_7 { + size_t struct_size; + unsigned ID; + size_t numSizes; + size_t fmtStrSize; + uint32_t *argSizes; + const char *fmtStr; +} printfFmt_0_7; + +//! A structure that holds the metadata in the RODATA section. +typedef struct _cl_metadata_0_7 { + size_t struct_size; // This holds the size of the structure itself for + // versioning. + size_t data_size; // This holds the size of all the memory allocated for this + // structure. + uint32_t major, minor, revision, gpuCaps, funcID; + uint32_t gpuRes[RT_RES_LAST]; + uint32_t wgs[3]; + uint32_t wrs[3]; + size_t kernelNameSize; + size_t deviceNameSize; + size_t mem[RT_MEM_LAST]; + size_t numArgs; + size_t numPrintf; + + argType *args; + printfFmt *printf; + const char *kernelName; + const char *deviceName; + bool enqueue_kernel; + uint32_t kernel_index; +} CLMetadata_0_7; + +#endif // _HSAIL_METADATA_HPP Index: lib/Target/HSAIL/HSAILModuleInfo.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILModuleInfo.h @@ -0,0 +1,105 @@ +//==-- HSAILModuleInfo.h ----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +/// \file +/// This is an MMI implementation for HSAIL targets. +// +//===----------------------------------------------------------------------===// + +#ifndef _HSAIL_MACHINE_MODULE_INFO_H_ +#define _HSAIL_MACHINE_MODULE_INFO_H_ + +#include "HSAIL.h" +#include "HSAILKernel.h" +#include "llvm/IR/Module.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +namespace llvm { +class Argument; +class TypeSymbolTable; +class GlobalValue; +class MachineFunction; +class GlobalValue; + +class HSAILMachineFunctionInfo; +class HSAILModuleInfo : public MachineModuleInfoImpl { + + void parseEdgGlobalVariables(const Module *M, const HSAILTargetMachine *mTM); + +protected: + const MachineModuleInfo *mMMI; + +public: + HSAILModuleInfo(const MachineModuleInfo &); + virtual ~HSAILModuleInfo(); + + void processModule(const Module *MF, const HSAILTargetMachine *mTM); + + /// Process the given module and parse out the global variable metadata passed + /// down from the frontend-compiler + + /// Returns true if the image ID corresponds to a read only image. + bool isReadOnlyImage(StringRef Name, uint32_t iID) const; + + /// Returns true if the image ID corresponds to a write only image. + bool isWriteOnlyImage(StringRef Name, uint32_t iID) const; + + /// Returns true if the image ID corresponds to a read write image. + bool isReadWriteImage(StringRef name, uint32_t iID) const; + + /// Get a reference to the kernel metadata information for the given function + /// name. + HSAILKernel *getKernel(StringRef Name); + + /// Query if the constant argument uses hardware or not + bool usesHWConstant(const HSAILKernel *krnl, StringRef Arg); + + /// Query the constant buffer number for a constant pointer. + uint32_t getConstPtrCB(const HSAILKernel *krnl, StringRef Arg); + + /// Get the unique function ID for the specific function name and create a new + /// unique ID if it is not found. + uint32_t getOrCreateFunctionID(const GlobalValue *func); + uint32_t getOrCreateFunctionID(const std::string &func); + + void add_printf_offset(uint32_t offset) { mPrintfOffset += offset; } + uint32_t get_printf_offset() { return mPrintfOffset; } + +public: + StringMap mKernels; + +private: + StringMap mKernelArgs; + StringMap mArrayMems; + StringMap mFuncNames; + DenseMap mFuncPtrNames; + DenseMap mImageNameMap; + StringMap> mSamplerSet; + std::set mByteStore; + std::set mIgnoreStr; + DenseMap mArgIDMap; + const char *symTab; + const HSAILSubtarget *mSTM; + size_t mOffset; + uint32_t mReservedBuffs; + uint32_t mCurrentCPOffset; + uint32_t mPrintfOffset; + bool mProcessed; +}; +} // end namespace llvm + +#endif // _HSAIL_COFF_MACHINE_MODULE_INFO_H_ Index: lib/Target/HSAIL/HSAILModuleInfo.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILModuleInfo.cpp @@ -0,0 +1,121 @@ +//===-- HSAILModuleInfo.cpp -----------------------------------------------===// + +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HSAILModuleInfo.h" +#include "HSAILTargetMachine.h" + +using namespace llvm; + +HSAILModuleInfo::HSAILModuleInfo(const MachineModuleInfo &MMI) { + mMMI = &MMI; + mOffset = 0; + mReservedBuffs = 0; + symTab = nullptr; + mCurrentCPOffset = 0; + mPrintfOffset = 0; + mProcessed = false; +} + +HSAILModuleInfo::~HSAILModuleInfo() { + for (StringMap::iterator kb = mKernels.begin(), + ke = mKernels.end(); + kb != ke; ++kb) { + HSAILKernel *ptr = kb->getValue(); + delete ptr; + } +} + +static const HSAILConstPtr *getConstPtr(const HSAILKernel *krnl, + const std::string &arg) { + if (!krnl) { + return nullptr; + } + SmallVector::const_iterator begin, end; + for (begin = krnl->constPtr.begin(), end = krnl->constPtr.end(); begin != end; + ++begin) { + if (!strcmp(begin->name.data(), arg.c_str())) { + return &(*begin); + } + } + return nullptr; +} + +void HSAILModuleInfo::processModule(const Module *M, + const HSAILTargetMachine *mTM) { + mSTM = mTM->getSubtargetImpl(); + if (mProcessed) { + return; + } + + // Make sure we only process the module once even though this function + // is called everytime a MachineFunctionInfo object is instantiated. + mProcessed = true; +} + +HSAILKernel *HSAILModuleInfo::getKernel(StringRef name) { + StringMap::iterator iter = mKernels.find(name); + if (iter == mKernels.end()) { + return nullptr; + } else { + return iter->second; + } +} + +bool HSAILModuleInfo::isWriteOnlyImage(StringRef name, uint32_t iID) const { + const StringMap::const_iterator kiter = mKernels.find(name); + if (kiter == mKernels.end()) { + return false; + } + return kiter->second->writeOnly.count(iID); +} + +bool HSAILModuleInfo::isReadOnlyImage(StringRef name, uint32_t iID) const { + const StringMap::const_iterator kiter = mKernels.find(name); + if (kiter == mKernels.end()) { + return false; + } + return kiter->second->readOnly.count(iID); +} + +bool HSAILModuleInfo::isReadWriteImage(StringRef name, uint32_t iID) const { + const StringMap::const_iterator kiter = mKernels.find(name); + if (kiter == mKernels.end()) { + return false; + } + return kiter->second->readWrite.count(iID); +} + +bool HSAILModuleInfo::usesHWConstant(const HSAILKernel *krnl, StringRef arg) { + const HSAILConstPtr *curConst = getConstPtr(krnl, arg); + if (!curConst) { + return false; + } + return curConst->usesHardware; +} + +uint32_t HSAILModuleInfo::getConstPtrCB(const HSAILKernel *krnl, + StringRef Arg) { + const HSAILConstPtr *curConst = getConstPtr(krnl, Arg); + if (!curConst) { + return 0; + } + return curConst->cbNum; +} + +uint32_t HSAILModuleInfo::getOrCreateFunctionID(const std::string &func) { + uint32_t id; + if (mFuncNames.find(func) == mFuncNames.end()) { + id = mFuncNames.size() + RESERVED_FUNCS + mFuncPtrNames.size(); + mFuncNames[func] = id; + } else { + id = mFuncNames[func]; + } + return id; +} Index: lib/Target/HSAIL/HSAILNodes.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILNodes.td @@ -0,0 +1,152 @@ +//===------------------------------------------------------*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +def HSAILDTIntTernaryOp : SDTypeProfile<1, 3, [ + SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3> +]>; + +def HSAILDTBitExtractOp : SDTypeProfile<1, 3, [ + SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisSameAs<2, 3>, SDTCisInt<2> +]>; + +def HSAILLdExpOp : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>] +>; + +def HSAILActiveLanePermuteOp : SDTypeProfile<1, 5, + [SDTCisInt<0>, SDTCisInt<1>, + SDTCisSameAs<0, 2>, SDTCisInt<3>, SDTCisSameAs<0, 4>, SDTCisInt<5>] +>; + +def HSAILActiveLaneIdOp : SDTypeProfile<1, 1, + [SDTCisInt<0>, SDTCisInt<1>] +>; + +def HSAILActiveLaneCountOp : SDTypeProfile<1, 2, + [SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>] +>; + +def HSAILActiveLaneMaskOp : SDTypeProfile<4, 2, + [SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, + SDTCisInt<4>, SDTCisInt<5>] +>; + +def HSAILFPClassOp : SDTypeProfile<1, 2, + [SDTCisInt<0>, SDTCisFP<1>, SDTCisInt<2>] +>; + +def HSAILLdaOp : SDTypeProfile<1, 2, + [SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>] +>; + +// i1 = segment, i1:nonull, ptr +def HSAILSegmentPOp : SDTypeProfile<1, 3, + [SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisInt<3>] +>; + +def HSAILArgLdOp : SDTypeProfile<1, 4, + [SDTCisInt<1>, SDTCisInt<2>, SDTCisSameAs<2, 3>] +>; + +def HSAILArgStOp : SDTypeProfile<0, 4, + [SDTCisInt<1>, SDTCisInt<2>, SDTCisInt<3>] +>; + +//===----------------------------------------------------------------------===// +// Call/Return DAG Nodes +//===----------------------------------------------------------------------===// +def IL_callseq_start : SDNode<"ISD::CALLSEQ_START", SDTIL_CallSeqStart, + [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; + +def IL_callseq_end : SDNode<"ISD::CALLSEQ_END", SDTIL_CallSeqEnd, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPSideEffect]>; + +def HSAILret : SDNode<"HSAILISD::RET", SDTNone, + [SDNPHasChain, SDNPOptInGlue] +>; + +def HSAILArgLd : SDNode<"HSAILISD::ARG_LD", HSAILArgLdOp, + [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue] +>; + +def HSAILArgSt : SDNode<"HSAILISD::ARG_ST", HSAILArgStOp, + [SDNPHasChain, SDNPMayStore, SDNPOutGlue, SDNPInGlue] +>; + +def HSAILlda : SDNode<"HSAILISD::LDA", HSAILLdaOp>; + +// out = a - floor(a) +def HSAILfract : SDNode<"HSAILISD::FRACT", SDTFPUnaryOp>; + +def HSAILnfma : SDNode<"HSAILISD::NFMA", SDTFPTernaryOp>; +def HSAILumad : SDNode<"HSAILISD::UMAD", HSAILDTIntTernaryOp>; +def HSAILsmad : SDNode<"HSAILISD::SMAD", HSAILDTIntTernaryOp>; +def HSAILbitselect : SDNode<"HSAILISD::BITSELECT", HSAILDTIntTernaryOp>; +def HSAILsbitextract : SDNode<"HSAILISD::SBITEXTRACT", HSAILDTBitExtractOp>; +def HSAILubitextract : SDNode<"HSAILISD::UBITEXTRACT", HSAILDTBitExtractOp>; + +// out = max(a, b) a and b are signed ints +def HSAILsmax : SDNode<"HSAILISD::SMAX", SDTIntBinOp, + [SDNPCommutative, SDNPAssociative] +>; + +// out = max(a, b) a and b are unsigned ints +def HSAILumax : SDNode<"HSAILISD::UMAX", SDTIntBinOp, + [SDNPCommutative, SDNPAssociative] +>; + +// out = min(a, b) a and b are signed ints +def HSAILsmin : SDNode<"HSAILISD::SMIN", SDTIntBinOp, + [SDNPCommutative, SDNPAssociative] +>; + +// out = min(a, b) a and b are unsigned ints +def HSAILumin : SDNode<"HSAILISD::UMIN", SDTIntBinOp, + [SDNPCommutative, SDNPAssociative] +>; + +// Signed and unsigned 24-bit mulitply. The highest 8-bits are ignore when +// performing the mulitply. The result is a 32-bit value. +def HSAILumul24 : SDNode<"HSAILISD::UMUL24", SDTIntBinOp, + [SDNPCommutative] +>; +def HSAILsmul24 : SDNode<"HSAILISD::SMUL24", SDTIntBinOp, + [SDNPCommutative] +>; + +def HSAILumad24 : SDNode<"HSAILISD::UMAD24", HSAILDTIntTernaryOp, + [] +>; +def HSAILsmad24 : SDNode<"HSAILISD::SMAD24", HSAILDTIntTernaryOp, + [] +>; + +def HSAILfldexp : SDNode<"HSAILISD::FLDEXP", HSAILLdExpOp>; + +def HSAILactivelanepermute : SDNode<"HSAILISD::ACTIVELANEPERMUTE", + HSAILActiveLanePermuteOp, [SDNPHasChain, SDNPSideEffect] +>; + +def HSAILactivelaneid : SDNode<"HSAILISD::ACTIVELANEID", + HSAILActiveLaneIdOp, [SDNPHasChain, SDNPSideEffect] +>; + +def HSAILactivelanecount : SDNode<"HSAILISD::ACTIVELANECOUNT", + HSAILActiveLaneCountOp, [SDNPHasChain, SDNPSideEffect] +>; + +def HSAILactivelanemask : SDNode<"HSAILISD::ACTIVELANEMASK", + HSAILActiveLaneMaskOp, [SDNPHasChain, SDNPSideEffect] +>; + +def HSAILclass : SDNode<"HSAILISD::CLASS", HSAILFPClassOp>; + +def HSAILkernargbaseptr : SDNode<"HSAILISD::KERNARGBASEPTR", SDTIntLeaf>; + +def HSAILsegmentp : SDNode<"HSAILISD::SEGMENTP", HSAILSegmentPOp>; Index: lib/Target/HSAIL/HSAILOpaqueTypes.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILOpaqueTypes.h @@ -0,0 +1,68 @@ +//===-- HSAILOpaqueTypes.h - SPIR opaque types ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// +/// \brief This file declares the API for working with SPIR opaque +/// types. This includes images and samplers among other things. +// +//===----------------------------------------------------------------------===// + +#ifndef __HSAIL_OPAQUE_TYPES_H__ +#define __HSAIL_OPAQUE_TYPES_H__ + +namespace llvm { + +class Type; + +enum OpaqueType { + NotOpaque, + I1D, + I1DA, + I1DB, + I2D, + I2DA, + I3D, + I2DDepth, + I2DADepth, + C32, + C64, + Sema, + Sampler, + Event, + ReserveId, + CLKEventT, + QueueT, + UnknownOpaque +}; + +OpaqueType GetOpaqueType(const Type *T); + +inline bool IsImage(OpaqueType OT) { + switch (OT) { + default: + return false; + case I1D: + case I1DA: + case I1DB: + case I2D: + case I2DA: + case I3D: + case I2DDepth: + case I2DADepth: + return true; + } +} + +inline bool IsImage(const Type *T) { return IsImage(GetOpaqueType(T)); } + +inline bool IsSampler(const Type *T) { return GetOpaqueType(T) == Sampler; } +} // end namespace llvm + +#endif Index: lib/Target/HSAIL/HSAILOpaqueTypes.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILOpaqueTypes.cpp @@ -0,0 +1,69 @@ +//===-- HSAILOpaqueTypes.cpp - SPIR opaque types --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// +/// \brief This file implements the API for working with SPIR opaque +/// types. This includes images and samplers among other things. +// +//===----------------------------------------------------------------------===// + +#include "HSAILOpaqueTypes.h" + +#include +#include + +using namespace llvm; + +/// \brief Check for an opaque type. +OpaqueType llvm::GetOpaqueType(const Type *T) { + // Handle the degenerate case first. + if (!T) + return NotOpaque; + + const StructType *ST = dyn_cast(T); + + // If the type is not a struct, check if it is a pointer and try to + // extract a struct from there. + if (!ST) { + const PointerType *PT = dyn_cast(T); + + // Not a struct, not a pointer. It can't be opaque. + if (!PT) + return NotOpaque; + + const Type *CT = PT->getElementType(); + ST = dyn_cast(CT); + } + + if (!ST || !ST->isOpaque()) + return NotOpaque; + + return StringSwitch(ST->getName()) + .Cases("opencl.image1d_t", "struct._image1d_t", I1D) + .Cases("opencl.image1d_array_t", "struct._image1d_array_t", I1DA) + .Cases("opencl.image1d_buffer_t", "struct._image1d_buffer_t", I1DB) + .Cases("opencl.image2d_t", "struct._image2d_t", I2D) + .Cases("opencl.image2d_array_t", "struct._image2d_array_t", I2DA) + .Cases("opencl.image3d_t", "struct._image3d_t", I3D) + .Cases("opencl.image2d_depth_t", "struct._image2d_depth_t", I2DDepth) + .Cases("opencl.image2d_array_depth_t", "struct._image2d_array_depth_t", + I2DADepth) + // There is no opaque sampler type in SPIR. The i32 in SPIR is + // lowered to the EDG-stype opaque sampler type. + .Case("struct._sampler_t", Sampler) + .Cases("opencl.event_t", "struct._event_t", Event) + .Case("struct._counter32_t", C32) + .Case("struct._counter64_t", C64) + .Case("struct._sema_t", Sema) + .Case("opencl.reserve_id_t", ReserveId) + .Case("opencl.clk_event_t", CLKEventT) + .Case("opencl.queue_t", QueueT) + .Default(UnknownOpaque); +} Index: lib/Target/HSAIL/HSAILOperands.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILOperands.td @@ -0,0 +1,86 @@ +//===------------------------------------------------------*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// Custom memory operands +//===----------------------------------------------------------------------===// +def PtrRC: Operand, PointerLikeRegClass<0>; +def PtrRC32: Operand, PointerLikeRegClass<32>; + +// Memory operand: base, register and offset. +// +// FIXME: iPTR is fundamentally broken for address spaces, should use +// something else. +def MEMOP : Operand { + let MIOperandInfo = (ops iAny:$base, iAny:$reg, iAny:$offset); + let PrintMethod = "printAddrMode3Op"; +} + +def Vec2DestOp32 : Operand { + let MIOperandInfo = (ops HSAILDest32Operand, HSAILDest32Operand); + let PrintMethod = "printVec2Op"; +} + +def Vec3DestOp32 : Operand { + let MIOperandInfo = (ops HSAILDest32Operand, HSAILDest32Operand, HSAILDest32Operand); + let PrintMethod = "printVec3Op"; +} + +def Vec4DestOp32 : Operand { + let MIOperandInfo = (ops HSAILDest32Operand, HSAILDest32Operand, HSAILDest32Operand, HSAILDest32Operand); + let PrintMethod = "printVec4Op"; +} + + +def Vec2DestOp64 : Operand { + let MIOperandInfo = (ops HSAILDest64Operand, HSAILDest64Operand); + let PrintMethod = "printVec2Op"; +} + +def Vec3DestOp64 : Operand { + let MIOperandInfo = (ops HSAILDest64Operand, HSAILDest64Operand, HSAILDest64Operand); + let PrintMethod = "printVec3Op"; +} + +def Vec4DestOp64 : Operand { + let MIOperandInfo = (ops HSAILDest64Operand, HSAILDest64Operand, HSAILDest64Operand, HSAILDest64Operand); + let PrintMethod = "printVec4Op"; +} + + +def calltarget : Operand; + +def GPROrImm : ComplexPattern; + +def ftz : Operand { + let PrintMethod = "printFTZ"; +} + +def nonull : Operand { + let PrintMethod = "printNoNull"; +} + +def equiv : Operand { + let PrintMethod = "printEquiv"; +} + +def v4mod : Operand { + let PrintMethod = "printV4"; +} + +def ArgDeclAlignment : Operand { + let OperandType = "OPERAND_IMMEDIATE"; + let PrintMethod = "printArgDeclAlignment"; +} + +def ArraySize : Operand { + let OperandType = "OPERAND_IMMEDIATE"; + let PrintMethod = "printArraySize"; +} Index: lib/Target/HSAIL/HSAILParamManager.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILParamManager.h @@ -0,0 +1,148 @@ +//===- HSAILParamManager.h - kernel/function arguments -----------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \ file +/// This file defines the HSAILParamManager class, which manages all defined +/// .param variables for a particular function. +// +//===----------------------------------------------------------------------===// + +#ifndef HSAIL_PARAM_MANAGER_H +#define HSAIL_PARAM_MANAGER_H + +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Type.h" + +namespace llvm { + +/// HSAILParamManager - This class manages all parameter variables defined for a +/// particular function. +class HSAILParamManager { +private: + /// HSAILParamType - Type of a kernarg/arg/call param variable + enum HSAILParamType { + HSAIL_PARAM_TYPE_KERNARG, + HSAIL_PARAM_TYPE_ARGUMENT, + HSAIL_PARAM_TYPE_RETURN, + HSAIL_PARAM_TYPE_CALL_PARAM, + HSAIL_PARAM_TYPE_CALL_RET + }; + + /// HSAILParam - Definition of a HSAIL kernarg/arg variable + struct HSAILParam { + HSAILParamType Type; + unsigned Offset; // Parameter offset in its segment + const Argument *Arg; // Original function argument if any + }; + + DenseMap AllParams; + DenseMap ParamNames; + DenseMap ParamTypes; + SmallVector ArgumentParams; + SmallVector ReturnParams; + SmallVector CallArgParams; + SmallVector CallRetParams; + + unsigned addParam(HSAILParamType ParamType, Type *Ty, + const StringRef ParamName); + + const DataLayout *DL; + +public: + typedef DenseMap::const_iterator names_iterator; + typedef SmallVector::const_iterator param_iterator; + + HSAILParamManager(const DataLayout *_DL) : DL(_DL){}; + ~HSAILParamManager(); + + param_iterator arg_begin() const { return ArgumentParams.begin(); } + param_iterator arg_end() const { return ArgumentParams.end(); } + param_iterator ret_begin() const { return ReturnParams.begin(); } + param_iterator ret_end() const { return ReturnParams.end(); } + param_iterator call_arg_begin() const { return CallArgParams.begin(); } + param_iterator call_arg_end() const { return CallArgParams.end(); } + param_iterator call_ret_begin() const { return CallRetParams.begin(); } + param_iterator call_ret_end() const { return CallRetParams.end(); } + + /// addArgumentParam - Returns a new variable used as an argument. + /// AS is an address space of the argument. + unsigned addArgumentParam(unsigned AS, const Argument &Arg, + const StringRef ParamName); + + /// addReturnParam - Returns a new variable used as a return argument. + unsigned addReturnParam(Type *Ty, const StringRef ParamName); + + /// addCallArgParam - Returns a new variable used as a call actual argument. + unsigned addCallArgParam(Type *Ty, const StringRef ParamName); + + /// addCallRetParam - Returns a new variable used as a call actual return + /// argument. + unsigned addCallRetParam(Type *Ty, const StringRef ParamName); + + /// addParamName - Saves a persistent copy of Param Name + void addParamName(std::string Name, unsigned Index); + + /// addParamType - Saves the type of the parameter + void addParamType(Type *pTy, unsigned Index); + + /// getParamName - Returns the name of the parameter as a string. + const char *getParamName(unsigned Param) const { + assert(AllParams.count(Param) == 1 && "Param has not been defined!"); + return ParamNames.find(Param)->second; + } + + /// getParamType - Returns the type of the parameter + Type *getParamType(unsigned Param) const { + assert(AllParams.count(Param) == 1 && "Param has not been defined!"); + return ParamTypes.find(Param)->second; + } + + /// getParamSize - Returns the size of the parameter in bits. + unsigned getParamSize(unsigned Param) const { + return DL->getTypeStoreSize(getParamType(Param)); + } + + /// getParamOffset - Returns an offset of the parameter in its segment if + /// available, or UINT_MAX if unknown. + unsigned getParamOffset(unsigned Param) const { + assert(AllParams.count(Param) == 1 && "Param has not been defined!"); + return AllParams.find(Param)->second.Offset; + } + + /// getParamOffset - Returns an offset of the parameter in its segment if + /// available, or UINT_MAX if unknown. + const Argument *getParamArg(unsigned Param) const { + assert(AllParams.count(Param) == 1 && "Param has not been defined!"); + return AllParams.find(Param)->second.Arg; + } + + /// Return parameter by its offset. + /// Offset is updated to refer to the parameter base address. + /// If parameter is not found returns UINT_MAX. + unsigned getParamByOffset(unsigned &Offset) const; + + unsigned getParamByOffset(int64_t &Offset) const { + if (Offset >= UINT_MAX || Offset < 0) + return UINT_MAX; + unsigned o = (unsigned)Offset; + unsigned r = getParamByOffset(o); + Offset = o; + return r; + } + + /// returns a unique argument name. + static std::string mangleArg(Mangler *Mang, const StringRef argName); +}; +} + +#endif Index: lib/Target/HSAIL/HSAILParamManager.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILParamManager.cpp @@ -0,0 +1,167 @@ +//=== HSAILParamManager.cpp - kernel/function arguments ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the HSAILParamManager class. +// +//===----------------------------------------------------------------------===// + +#include "HSAIL.h" +#include "HSAILParamManager.h" +#include "HSAILOpaqueTypes.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/IR/Mangler.h" +#include + +using namespace llvm; + +HSAILParamManager::~HSAILParamManager() { + // Special handling for teardown of ParamNames + for (names_iterator I = ParamNames.begin(), E = ParamNames.end(); I != E; + ++I) { + // Delete malloc'ed name strings + free(I->second); + } + ParamNames.clear(); +} + +unsigned HSAILParamManager::addParam(HSAILParamType ParamType, Type *Ty, + const StringRef ParamName) { + HSAILParam Param; + Param.Type = ParamType; + Param.Arg = nullptr; + SmallVector *ParamList = 0; + const char *DefName = 0; + + std::string Name; + + switch (ParamType) { + case HSAIL_PARAM_TYPE_KERNARG: + case HSAIL_PARAM_TYPE_ARGUMENT: + ParamList = &ArgumentParams; + DefName = "__arg_p"; + break; + case HSAIL_PARAM_TYPE_RETURN: + ParamList = &ReturnParams; + DefName = "__ret_"; + break; + case HSAIL_PARAM_TYPE_CALL_PARAM: + ParamList = &CallArgParams; + DefName = "__param_"; + break; + case HSAIL_PARAM_TYPE_CALL_RET: + ParamList = &CallRetParams; + DefName = "__ret_"; + break; + } + + if (ParamName.empty()) { + Name = DefName; + Name += utostr(ParamList->size()); + } else { + Name = ParamName; + } + + unsigned prev_offset = 0; + unsigned prev_size = 0; + if (ParamList->size() > 0) { + unsigned prev_param = (*ParamList)[ParamList->size() - 1]; + prev_offset = getParamOffset(prev_param); + prev_size = getParamSize(prev_param); + } + if (prev_offset == UINT_MAX || GetOpaqueType(Ty)) { + Param.Offset = UINT_MAX; + } else { + unsigned alignment = DL->getABITypeAlignment(Ty); + // W/a for RT alignment of vectors to element size: + if (ParamType == HSAIL_PARAM_TYPE_KERNARG && Ty->isVectorTy()) + alignment = DL->getABITypeAlignment(Ty->getVectorElementType()); + assert(alignment != 0); + Param.Offset = (prev_offset + prev_size + alignment - 1) & ~(alignment - 1); + } + + unsigned Index = AllParams.size(); + AllParams[Index] = Param; + ParamList->push_back(Index); + + addParamName(Name, Index); + addParamType(Ty, Index); + + return Index; +} + +unsigned HSAILParamManager::addArgumentParam(unsigned AS, const Argument &Arg, + const StringRef ParamName) { + unsigned Param = + addParam((AS == HSAILAS::ARG_ADDRESS) ? HSAIL_PARAM_TYPE_ARGUMENT + : HSAIL_PARAM_TYPE_KERNARG, + Arg.getType(), ParamName); + AllParams.find(Param)->second.Arg = &Arg; + return Param; +} + +unsigned HSAILParamManager::addReturnParam(Type *Ty, + const StringRef ParamName) { + return addParam(HSAIL_PARAM_TYPE_RETURN, Ty, ParamName); +} + +unsigned HSAILParamManager::addCallArgParam(Type *Ty, + const StringRef ParamName) { + return addParam(HSAIL_PARAM_TYPE_CALL_PARAM, Ty, ParamName); +} + +unsigned HSAILParamManager::addCallRetParam(Type *Ty, + const StringRef ParamName) { + return addParam(HSAIL_PARAM_TYPE_CALL_RET, Ty, ParamName); +} + +void HSAILParamManager::addParamName(std::string Name, unsigned Index) { + // malloc arg name string so that it persists through compilation + char *name = (char *)malloc(Name.length() + 1); + strcpy(name, Name.c_str()); + ParamNames[Index] = name; +} + +void HSAILParamManager::addParamType(Type *pTy, unsigned Index) { + ParamTypes[Index] = pTy; +} + +unsigned HSAILParamManager::getParamByOffset(unsigned &Offset) const { + unsigned param_no = ArgumentParams.size(); + for (unsigned i = 0; i < param_no; i++) { + unsigned param = ArgumentParams[i]; + unsigned o = getParamOffset(param); + if (o == UINT_MAX) + break; + if ((o <= Offset) && ((o + getParamSize(param)) > Offset)) { + // Parameter found and addressing is in bound. + Offset -= o; + return param; + } + } + return UINT_MAX; +} + +/// returns a unique argument name for flattened vector component. +std::string HSAILParamManager::mangleArg(Mangler *Mang, + const StringRef argName) { + if (argName.empty()) + return ""; + + std::string NameStrStorage; + + { + raw_string_ostream NameStr(NameStrStorage); + Mang->getNameWithPrefix(NameStr, argName); + } + + return std::move(NameStrStorage); +} Index: lib/Target/HSAIL/HSAILPatterns.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILPatterns.td @@ -0,0 +1,24 @@ +//===------------------------------------------------------*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Store pattern fragments. +// +//===----------------------------------------------------------------------===// + + +def ADDR : ComplexPattern; + +// FIXME: This should really be a complex pattern on the root load, +// but there seem to be problems trying to use a ComplexPattern with a +// chain in a Pat. +def LoadAddr : ComplexPattern; +def StoreAddr : ComplexPattern; +def AtomicAddr : ComplexPattern; + +def SetCCPat : ComplexPattern; Index: lib/Target/HSAIL/HSAILProfiles.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILProfiles.td @@ -0,0 +1,14 @@ +//===---------------- HSAILILProfiles.td - HSAIL Profiles -----------------===// +// These are used for custom selection dag type profiles +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Call Sequence Profiles +//===----------------------------------------------------------------------===// +def SDTIL_CallSeqStart : SDCallSeqStart<[ + SDTCisVT<0, i32> + ]>; +def SDTIL_CallSeqEnd : SDCallSeqEnd<[ + SDTCisVT<0, i32>, SDTCisVT<1, i32> + ]>; + Index: lib/Target/HSAIL/HSAILRegisterInfo.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILRegisterInfo.h @@ -0,0 +1,76 @@ +//=- HSAILRegisterInfo.h - HSAIL Register Information Impl --------*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the HSAIL implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef _HSAIL_REGISTER_INFO_H_ +#define _HSAIL_REGISTER_INFO_H_ + +#include "llvm/Target/TargetRegisterInfo.h" + +#define GET_REGINFO_HEADER +#include "HSAILGenRegisterInfo.inc" + +namespace llvm { +class Type; +class TargetInstrInfo; +class HSAILSubtarget; + +/// DWARFFlavour - Flavour of dwarf regnumbers +/// +namespace DWARFFlavour { +enum { HSAIL_Generic = 0 }; +} + +class HSAILRegisterInfo : public HSAILGenRegisterInfo { +private: + HSAILSubtarget &ST; + + void lowerSpillB1(MachineBasicBlock::iterator II, int FrameIndex) const; + void lowerRestoreB1(MachineBasicBlock::iterator II, int FrameIndex) const; + +public: + HSAILRegisterInfo(HSAILSubtarget &st); + + const uint16_t * + getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override; + + BitVector getRegsAvailable(const TargetRegisterClass *RC) const; + + BitVector getReservedRegs(const MachineFunction &MF) const override; + + bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override; + + bool requiresRegisterScavenging(const MachineFunction &MF) const override; + + bool requiresFrameIndexScavenging(const MachineFunction &MF) const override; + + bool saveScavengerRegister(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &UseMI, + const TargetRegisterClass *RC, + unsigned Reg) const override; + + void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, + unsigned FIOperandNum, + RegScavenger *RS = nullptr) const override; + + unsigned getFrameRegister(const MachineFunction &MF) const override; + + unsigned getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const override; + + const TargetRegisterClass *getPhysRegClass(unsigned Reg) const; +}; + +} // End llvm namespace + +#endif // _HSAIL_REGISTER_INFO_H_ Index: lib/Target/HSAIL/HSAILRegisterInfo.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILRegisterInfo.cpp @@ -0,0 +1,334 @@ +//===- HSAILRegisterInfo.cpp - HSAIL Register Information -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the HSAIL implementation of the TargetRegisterInfo class. +// This file is responsible for the frame pointer elimination optimization +// on HSAIL. +// +//===----------------------------------------------------------------------===// + +#include "HSAIL.h" +#include "HSAILBrigDefs.h" +#include "HSAILRegisterInfo.h" +#include "HSAILMachineFunctionInfo.h" +#include "HSAILSubtarget.h" +#include "HSAILTargetMachine.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +//#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "hsail-reginfo" + +using namespace llvm; + +static cl::opt HSAILReg32PressureLimit( + "hsail-reg32-pressure-limit", cl::Hidden, cl::init(24), + cl::desc("Register pressure limit for 32 bit HSAIL registers")); + +static cl::opt HSAILReg64PressureLimit( + "hsail-reg64-pressure-limit", cl::Hidden, cl::init(18), + cl::desc("Register pressure limit for 64 bit HSAIL registers")); + +static cl::opt HSAILRegSlots( + "hsail-reg-slots", cl::Hidden, cl::init(0), + cl::desc("A number of 64-bit slots allocated for $s registers")); + +HSAILRegisterInfo::HSAILRegisterInfo(HSAILSubtarget &st) + : HSAILGenRegisterInfo(0, 0), ST(st) {} + +const uint16_t * +HSAILRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + static const uint16_t CalleeSavedRegs[] = {0}; + return CalleeSavedRegs; +} + +BitVector +HSAILRegisterInfo::getRegsAvailable(const TargetRegisterClass *RC) const { + BitVector Mask(getNumRegs()); + for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E; + ++I) + Mask.set(*I); + return Mask; +} + +BitVector HSAILRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + + // We can have up to 128 s-registers, but we should have (s + 2*d + 4*q) <= + // 128. + // Let's calulate the number of 32 and 64 bit VRs used in the function + // and partition register file accordingly. + HSAILMachineFunctionInfo *MFI = const_cast( + MF.getInfo()); + unsigned NumSlotsTotal = HSAIL::GPR64RegClass.getNumRegs(); + // Default register file partitioning 64 s-regs + 32 d-regs, RegSlots = 32. + unsigned RegSlots = NumSlotsTotal / 2; + + // First query for this function, calculate register use + if (MFI->getRegisterPartitioning() == 0) { + const MachineRegisterInfo &RI = MF.getRegInfo(); + unsigned rc32 = 0, rc64 = 0; + for (unsigned i = 0, e = RI.getNumVirtRegs(); i != e; ++i) { + switch (RI.getRegClass(index2VirtReg(i))->getSize()) { + case 4: + rc32++; + break; + case 8: + rc64++; + break; + } + } + + if (HSAILRegSlots > 0) { + RegSlots = HSAILRegSlots; + } else { + // Calculate register file partitioning. We have 64 allocatable slots + // which + // are either 1 d-register or a pair of s-registers. 8 slots are reserved + // for 16 s-registers $s0..$s15, 8 are for 8 d-registers $d0..$d7. + // Default partitioning is 64 s-registers + 32 d-registers, which is + // RegSlots = 32 + + // If we have a small amount of 64 bit VRs, but high 32 bit register + // pressure reallocate slots to decrease 64 bit registers + if (rc64 < (NumSlotsTotal - RegSlots) && rc32 > (RegSlots * 2)) { + RegSlots = NumSlotsTotal - rc64; + } + // The opposite situation, we have a small demand on 32 bit registers but + // high pressure for 64 bit + else if (rc32 < (RegSlots * 2) && rc64 > (NumSlotsTotal - RegSlots)) { + RegSlots = (rc32 + 1) / 2; + } + } + + // Always preserve room for at least 16 s-registers and 8 d-registers + if (RegSlots < 8) + RegSlots = 8; + else if (RegSlots > (NumSlotsTotal - 8)) + RegSlots = NumSlotsTotal - 8; + + MFI->setRegisterPartitioning(RegSlots); + DEBUG(dbgs() << "\nFunction: " << MF.getFunction()->getName() + << " VR count: 32 bit = " << rc32 << ", 64 bit = " << rc64 + << ", register file partitioning: " << RegSlots * 2 << " $s + " + << NumSlotsTotal - RegSlots << " $d\n\n"); + } else { + RegSlots = MFI->getRegisterPartitioning(); + } + + unsigned Reg; + unsigned LastSReg = HSAIL::S0 + HSAIL::GPR32RegClass.getNumRegs() - 1; + for (Reg = HSAIL::S0 + RegSlots * 2; Reg <= LastSReg; ++Reg) { + Reserved.set(Reg); + } + unsigned LastDReg = HSAIL::D0 + HSAIL::GPR64RegClass.getNumRegs() - 1; + for (Reg = HSAIL::D0 + (NumSlotsTotal - RegSlots); Reg <= LastDReg; ++Reg) { + Reserved.set(Reg); + } + + return Reserved; +} + +bool HSAILRegisterInfo::trackLivenessAfterRegAlloc( + const MachineFunction &MF) const { + // TODO: Only enable when post-RA scheduling is enabled and this is needed. + return true; +} + +bool HSAILRegisterInfo::requiresRegisterScavenging( + const MachineFunction &MF) const { + return true; +} + +bool HSAILRegisterInfo::requiresFrameIndexScavenging( + const MachineFunction &MF) const { + // If we have spilled condition registers, we create virtual registers when + // replacing the pseudos. + const HSAILMachineFunctionInfo *Info = MF.getInfo(); + return Info->hasSpilledCRs(); +} + +void HSAILRegisterInfo::lowerSpillB1(MachineBasicBlock::iterator II, + int FrameIndex) const { + MachineBasicBlock *MBB = II->getParent(); + MachineFunction *MF = MBB->getParent(); + MachineInstr &MI = *II; + const HSAILInstrInfo *TII = ST.getInstrInfo(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + unsigned TempGPR32 = MRI.createVirtualRegister(&HSAIL::GPR32RegClass); + + DebugLoc DL = MI.getDebugLoc(); + BuildMI(*MBB, II, DL, TII->get(HSAIL::CVT_U32_B1), TempGPR32) + .addImm(0) // ftz + .addImm(0) // round + .addImm(BRIG_TYPE_U32) // destTypedestLength + .addImm(BRIG_TYPE_B1) // srcTypesrcLength + .addOperand(MI.getOperand(0)); + + MI.setDesc(TII->get(HSAIL::ST_U32)); + MI.getOperand(0).setReg(TempGPR32); + MI.getOperand(0).setIsKill(); + + MachineOperand *TypeOp = TII->getNamedOperand(MI, HSAIL::OpName::TypeLength); + TypeOp->setImm(BRIG_TYPE_U32); +} + +void HSAILRegisterInfo::lowerRestoreB1(MachineBasicBlock::iterator II, + int FrameIndex) const { + MachineBasicBlock *MBB = II->getParent(); + MachineInstr &MI = *II; + DebugLoc DL = MI.getDebugLoc(); + unsigned DestReg = MI.getOperand(0).getReg(); + const HSAILInstrInfo *TII = ST.getInstrInfo(); + + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + unsigned TempGPR32 = MRI.createVirtualRegister(&HSAIL::GPR32RegClass); + + BuildMI(*MBB, ++II, DL, TII->get(HSAIL::CVT_B1_U32), DestReg) + .addImm(0) // ftz + .addImm(0) // round + .addImm(BRIG_TYPE_B1) // destTypedestLength + .addImm(BRIG_TYPE_U32) // srcTypesrcLength + .addReg(TempGPR32, RegState::Kill); + + MI.setDesc(TII->get(HSAIL::LD_U32)); + MI.getOperand(0).setReg(TempGPR32); + MI.getOperand(0).setIsDef(); + + MachineOperand *TypeOp = TII->getNamedOperand(MI, HSAIL::OpName::TypeLength); + TypeOp->setImm(BRIG_TYPE_U32); +} + +bool HSAILRegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &UseMI, + const TargetRegisterClass *RC, + unsigned Reg) const { + MachineFunction *MF = MBB.getParent(); + HSAILMachineFunctionInfo *Info = MF->getInfo(); + MCContext &Ctx = MF->getContext(); + const HSAILInstrInfo *TII = ST.getInstrInfo(); + + // We only rely on the RegScavenger in rare cases for the temp registers + // needed when expanding spill_b1 / restore_b1. + assert(RC == &HSAIL::GPR32RegClass && + "Only expecting s register spills during emergencies"); + + DebugLoc DL = I->getDebugLoc(); + + // We don't really have a stack, and there's no real reason we can't create + // more stack objects. We will define a special spill variable for this case. + Info->setHasScavengerSpill(); + + MCSymbol *Sym = Ctx.GetOrCreateSymbol(StringRef("%___spillScavenge")); + BuildMI(MBB, I, DL, TII->get(HSAIL::ST_U32)) + .addReg(Reg, RegState::Kill) // src + .addSym(Sym) // address_base + .addReg(HSAIL::NoRegister) // address_reg + .addImm(0) // address_offset + .addImm(BRIG_TYPE_U32) // TypeLength + .addImm(HSAILAS::SPILL_ADDRESS) // segment + .addImm(RC->getAlignment()); // align + + BuildMI(MBB, UseMI, DL, TII->get(HSAIL::LD_U32), Reg) + .addSym(Sym) // address_base + .addReg(HSAIL::NoRegister) // address_reg + .addImm(0) // address_offset + .addImm(BRIG_TYPE_U32) // TypeLength + .addImm(HSAILAS::SPILL_ADDRESS) // segment + .addImm(RC->getAlignment()) // align + .addImm(BRIG_WIDTH_1) // width + .addImm(0); // mask + + return true; +} + +void HSAILRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { + assert(SPAdj == 0 && "Unexpected"); + MachineInstr &MI = *II; + unsigned Opcode = MI.getOpcode(); + MachineFunction *MF = MI.getParent()->getParent(); + const MachineFrameInfo *MFI = MF->getFrameInfo(); + MCContext &Ctx = MF->getContext(); + + assert(HSAIL::getNamedOperandIdx(Opcode, HSAIL::OpName::address) == + static_cast(FIOperandNum) && + "Frame index should only be used for address operands"); + + MachineOperand &Base = MI.getOperand(FIOperandNum); + int FrameIndex = Base.getIndex(); + + if (Opcode == HSAIL::SPILL_B1) + lowerSpillB1(II, FrameIndex); + else if (Opcode == HSAIL::RESTORE_B1) + lowerRestoreB1(II, FrameIndex); + + StringRef SymName = MFI->isSpillSlotObjectIndex(FrameIndex) ? + "%__spillStack" : "%__privateStack"; + MCSymbol *Sym = Ctx.GetOrCreateSymbol(SymName); + + Base.ChangeToMCSymbol(Sym); +} + +//===--------------------------------------------------------------------===// +/// Debug information queries. + +unsigned HSAILRegisterInfo::getFrameRegister(const MachineFunction &MF) const { + // This value is unused in LLVM + return HSAIL::NoRegister; +} + +unsigned HSAILRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const { + if (RC == &HSAIL::GPR32RegClass) { + return HSAILReg32PressureLimit; + } + if (RC == &HSAIL::GPR64RegClass) { + return HSAILReg64PressureLimit; + } + return 0; +} + +const TargetRegisterClass * +HSAILRegisterInfo::getPhysRegClass(unsigned Reg) const { + assert(!TargetRegisterInfo::isVirtualRegister(Reg)); + + static const TargetRegisterClass *BaseClasses[] = { + &HSAIL::GPR32RegClass, &HSAIL::GPR64RegClass, &HSAIL::CRRegClass}; + + for (const TargetRegisterClass *BaseClass : BaseClasses) { + if (BaseClass->contains(Reg)) + return BaseClass; + } + return nullptr; +} + +#define GET_REGINFO_TARGET_DESC +#include "HSAILGenRegisterInfo.inc" Index: lib/Target/HSAIL/HSAILRegisterInfo.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILRegisterInfo.td @@ -0,0 +1,56 @@ +//==- HSAILRegisterInfo.td - Main HSAIL Register Definition -*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the HSAIL register file, defining the registers themselves, +// aliases between the registers, and the register classes built out of the +// registers. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Register definitions... +// +class HSAILReg : Register { + let Namespace = "HSAIL"; + let DwarfNumbers = [d]; +} + + +//32-bit registers for signed, unsigned integers or float values +foreach N = 0-127 in { + def S#N : HSAILReg<"$s"#N, !add(0, N)>; +} + +//64-bit registers for signed, unsigned long integers or double float values +foreach N = 0-63 in { + def D#N : HSAILReg<"$d"#N, !add(128, N)>; +} + +//1-bit control registers +foreach N = 0-7 in { + def C#N : HSAILReg<"$c"#N, !add(192, N)>; +} + +//===----------------------------------------------------------------------===// +// HSAILReg Class Definitions... now that we have all of the pieces, define the +// top-level register classes. The order specified in the register list is +// implicitly defined to be the register allocation order. +// +def GPR32 : RegisterClass<"HSAIL", [i32, f32], 32, + (sequence "S%u", 0, 127)> { +} + +def GPR64 : RegisterClass<"HSAIL", [i64, f64], 64, + (sequence "D%u", 0, 63)> { +} + +def CR : RegisterClass<"HSAIL", [i1], 32, + (sequence "C%u", 0, 7)> { + let Size = 32; +} Index: lib/Target/HSAIL/HSAILSection.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILSection.h @@ -0,0 +1,43 @@ +//===- HSAILSection.h - HSAIL-specific section representation -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the HSAILSection class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_HSAILSECTION_H +#define LLVM_LIB_TARGET_HSAIL_HSAILSECTION_H + +#include "llvm/MC/MCSection.h" + +namespace llvm { + +// HSAIL does not have sections. Override this so we don't get unwanted .text +// labels emitted. +class HSAILSection : public MCSection { +public: + HSAILSection(SectionVariant V, SectionKind K) : MCSection(V, K, nullptr) {} + virtual ~HSAILSection(); + + void PrintSwitchToSection(const MCAsmInfo &MAI, + raw_ostream &OS, + const MCExpr *Subsection) const override; + + bool UseCodeAlign() const override { + return false; + } + + bool isVirtualSection() const override { + return false; + } +}; + +} // end namespace llvm + +#endif Index: lib/Target/HSAIL/HSAILSection.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILSection.cpp @@ -0,0 +1,23 @@ +//===- HSAILSection.cpp -----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HSAILSection.h" + +using namespace llvm; + + +void HSAILSection::PrintSwitchToSection(const MCAsmInfo &MAI, + raw_ostream &OS, + const MCExpr *Subsection) const { + // Do nothing. There are no sections +} + +HSAILSection::~HSAILSection() { + +} Index: lib/Target/HSAIL/HSAILSpecial.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILSpecial.td @@ -0,0 +1,128 @@ +//===------------------------------------------------------*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//////////////////////////////////////////////////////////////////////////////// +// special operations + +let isAsCheapAsAMove = 1 in { + let isReMaterializable = 1 in { + defm WORKITEMABSID : InstBasic_1Op_UnsignedIntTypes<"workitemabsid", BrigOpcode.WORKITEMABSID>; + def WORKGROUPID_U32 : HSAILInstBasic_1Op<"workgroupid", BrigOpcode.WORKGROUPID, Inst_U32_U32>; + def WORKITEMID_U32 : HSAILInstBasic_1Op<"workitemid", BrigOpcode.WORKITEMID, Inst_U32_U32>; + def WORKGROUPSIZE_U32 : HSAILInstBasic_1Op<"workgroupsize", BrigOpcode.WORKGROUPSIZE, Inst_U32_U32>; + def CURRENTWORKGROUPSIZE_U32 : HSAILInstBasic_1Op<"currentworkgroupsize", BrigOpcode.CURRENTWORKGROUPSIZE, Inst_U32_U32>; + def GRIDGROUPS_U32 : HSAILInstBasic_1Op<"gridgroups", BrigOpcode.GRIDGROUPS, Inst_U32_U32>; + def GRIDSIZE_U32 : HSAILInstBasic_1Op<"gridsize", BrigOpcode.GRIDSIZE, Inst_U32_U32>; + } + + def DIM_U32 : HSAILInstBasic_0Op<"dim", BrigOpcode.DIM, Inst_U32>; + def WORKITEMFLATID_U32 : HSAILInstBasic_0Op<"workitemflatid", BrigOpcode.WORKITEMFLATID, Inst_U32>; + defm WORKITEMFLATABSID : InstBasic_0Op_UnsignedIntTypes<"workitemflatabsid", BrigOpcode.WORKITEMFLATABSID>; + def LANEID_U32 : HSAILInstBasic_0Op<"laneid", BrigOpcode.LANEID, Inst_U32>; + def WAVEID_U32 : HSAILInstBasic_0Op<"waveid", BrigOpcode.WAVEID, Inst_U32>; + def MAXWAVEID_U32 : HSAILInstBasic_0Op<"maxwaveid", BrigOpcode.MAXWAVEID, Inst_U32>; + + let hasSideEffects = 1 in { + def CLOCK_U64 : HSAILInstBasic_0Op<"clock", BrigOpcode.CLOCK, Inst_U64>; + def CUID_U32 : HSAILInstBasic_0Op<"cuid", BrigOpcode.CUID, Inst_U32>; + } +} + +def : InstBasic_1Op_Pat; +def : InstBasic_1Op_Pat; +def : InstBasic_1Op_Pat; +def : InstBasic_1Op_Pat; +def : InstBasic_1Op_Pat; +def : InstBasic_1Op_Pat; +def : InstBasic_1Op_Pat; + +def : InstBasic_0Op_Pat; +def : InstBasic_0Op_Pat; +def : InstBasic_0Op_Pat; +def : InstBasic_0Op_Pat; +def : InstBasic_0Op_Pat; +def : InstBasic_0Op_Pat; + + +def : InstBasic_0Op_Pat; +def : InstBasic_0Op_Pat; + +let isNotDuplicable = 1, isBarrier = 1 in { + def BARRIER : HSAILInstBr_0Op_NoRet<"barrier", BrigOpcode.BARRIER> { + let WidthAttr = WidthAttrValues.ALL; + } + + def WAVEBARRIER : HSAILInstBr_0Op_NoRet<"wavebarrier", BrigOpcode.WAVEBARRIER> { + let WidthAttr = WidthAttrValues.WAVESIZE; + } +} + +def : InstBr_0Op_NoRet_Pat; +def : InstBr_0Op_NoRet_Pat; + +//////////////////////////////////////////////////////////////////////////////// +// SYNC OCL 2.0 + +def MEMFENCE : InstMemFence<"memfence", BrigOpcode.MEMFENCE>; + +def : Pat< + (int_HSAIL_memfence (i32 imm:$order), (i32 imm:$scope)), + (MEMFENCE imm:$order, imm:$scope) +>; + +def IMAGEFENCE : HSAILInstBasic_0Op_NoRet<"imagefence", BrigOpcode.IMAGEFENCE, Inst_Void>; +def : InstBasic_0Op_NoRet_Pat; + + +defm ACTIVELANEPERMUTE : + InstLane_ActiveLanePermute_Types<"activelanepermute", + BrigOpcode.ACTIVELANEPERMUTE>; + +def : ActiveLanePermutePat; +def : ActiveLanePermutePat; + +def ACTIVELANEID_U32 : HSAILInstLane_0Op<"activelaneid", BrigOpcode.ACTIVELANEID, Inst_U32>; +def : ActiveLaneIdPat; + +def ACTIVELANECOUNT_U32_B1 : HSAILInstLane_1Op<"activelanecount", BrigOpcode.ACTIVELANECOUNT, Inst_U32_B1>; +def : ActiveLaneCountPat; + +let isAsCheapAsAMove = 1, isReMaterializable = 1 in { + defm NULLPTR : InstSeg_0Op_PtrTypes<"nullptr", BrigOpcode.NULLPTR>; +} + +def : InstSeg_0Op_Pat; +def : InstSeg_0Op_Pat; + +let Predicates = [LargeModel] in { + def : InstSeg_0Op_Pat; // Same as flat. + def : InstSeg_0Op_Pat; + def : InstSeg_0Op_Pat; // Same as flat. + def : InstSeg_0Op_Pat; +} + +let Predicates = [SmallModel] in { + def : InstSeg_0Op_Pat; // Same as flat. + def : InstSeg_0Op_Pat; + def : InstSeg_0Op_Pat; // Same as flat. + def : InstSeg_0Op_Pat; +} + +let isAsCheapAsAMove = 1, isReMaterializable = 1 in { + defm KERNARGBASEPTR : InstBasic_0Op_UnsignedIntTypes<"kernargbaseptr", BrigOpcode.KERNARGBASEPTR>; +} + +def : InstBasic_0Op_Pat; +def : InstBasic_0Op_Pat; + +// Cross-Lane Operations +def ACTIVELANEMASK_V4_B64_B1 : HSAILInstLane_ActiveLaneMask<"activelanemask", BrigOpcode.ACTIVELANEMASK>; + Index: lib/Target/HSAIL/HSAILStoreInitializer.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILStoreInitializer.h @@ -0,0 +1,81 @@ +//===-- HSAILStoreInitializer.h ---------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_HSAILSTOREINITIALIZER_H +#define LLVM_LIB_TARGET_HSAIL_HSAILSTOREINITIALIZER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/EndianStream.h" + +#include + +namespace llvm { + +class AsmPrinter; +class APInt; +class Constant; +class DataLayout; +class GlobalValue; +class MCExpr; +class StringRef; +class Type; + +class StoreInitializer { +public: + // Track offset wher the the address of a global needs to be inserted. + struct VarInitOffset { + uint64_t BaseOffset; + const MCExpr *Expr; + + VarInitOffset(uint64_t Offset, const MCExpr *E) + : BaseOffset(Offset), Expr(E) {} + }; + +private: + const DataLayout &DL; + AsmPrinter &AP; + uint32_t InitEltSize; + bool IsFPElt; + + SmallString<1024> m_data; + raw_svector_ostream OS; + support::endian::Writer LE; + + std::vector VarInitAddresses; + + void initVarWithAddress(const GlobalValue *GV, StringRef Var, + const APInt &Offset); + + void printFloat(uint32_t, raw_ostream &O); + void printDouble(uint64_t, raw_ostream &O); + +public: + StoreInitializer(Type *EltTy, AsmPrinter &AP); + + void append(const Constant *CV, StringRef Var); + + ArrayRef varInitAddresses() const { + return makeArrayRef(VarInitAddresses); + } + + StringRef str() { return OS.str(); } + + size_t elementCount() { return dataSizeInBytes() / InitEltSize; } + + size_t dataSizeInBytes() { + // Be sure to flush the stream before computing the size. + return OS.str().size(); + } + + void print(raw_ostream &O); +}; +} + +#endif Index: lib/Target/HSAIL/HSAILStoreInitializer.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILStoreInitializer.cpp @@ -0,0 +1,266 @@ +//===-- HSAILStoreInitializer.cpp -----------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HSAILStoreInitializer.h" + +#include "HSAILAsmPrinter.h" + +#include "llvm/ADT/SmallString.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Operator.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +StoreInitializer::StoreInitializer(Type *EltTy, AsmPrinter &AP) + : DL(AP.getDataLayout()), AP(AP), InitEltSize(DL.getTypeAllocSize(EltTy)), + IsFPElt(EltTy->isFloatingPointTy()), m_data(), OS(m_data), LE(OS) {} + +void StoreInitializer::append(const Constant *CV, StringRef Var) { + switch (CV->getValueID()) { + case Value::ConstantArrayVal: { // Recursive type. + const ConstantArray *CA = cast(CV); + for (unsigned I = 0, E = CA->getNumOperands(); I < E; ++I) + append(cast(CA->getOperand(I)), Var); + + break; + } + case Value::ConstantDataArrayVal: { + const ConstantDataArray *CVE = cast(CV); + for (unsigned I = 0, E = CVE->getNumElements(); I < E; ++I) + append(cast(CVE->getElementAsConstant(I)), Var); + + break; + } + case Value::ConstantStructVal: { // Recursive type. + const ConstantStruct *S = cast(CV); + StructType *ST = S->getType(); + const StructLayout *SL = DL.getStructLayout(ST); + + uint64_t StructSize = DL.getTypeAllocSize(ST); + uint64_t BaseOffset = SL->getElementOffset(0); + + for (unsigned I = 0, E = S->getNumOperands(); I < E; ++I) { + Constant *Elt = cast(S->getOperand(I)); + append(Elt, Var); + + uint64_t EltSize = DL.getTypeAllocSize(Elt->getType()); + uint64_t EltOffset = SL->getElementOffset(I); + + uint64_t PaddedEltSize; + if (I == E - 1) + PaddedEltSize = BaseOffset + StructSize - EltOffset; + else + PaddedEltSize = SL->getElementOffset(I + 1) - EltOffset; + + // Match structure layout by padding with zeroes. + while (EltSize < PaddedEltSize) { + LE.write(static_cast(0)); + ++EltSize; + } + } + break; + } + case Value::ConstantVectorVal: { // Almost leaf type. + const ConstantVector *CVE = cast(CV); + VectorType *Ty = CVE->getType(); + Type *EltTy = Ty->getElementType(); + unsigned NElts = Ty->getNumElements(); + unsigned RealNElts = DL.getTypeAllocSize(Ty) / DL.getTypeAllocSize(EltTy); + + unsigned I; + for (I = 0; I < NElts; ++I) + append(cast(CVE->getOperand(I)), Var); + + Constant *Zero = Constant::getNullValue(EltTy); + while (I < RealNElts) { + append(Zero, Var); + ++I; + } + + break; + } + case Value::ConstantDataVectorVal: { + const ConstantDataVector *CVE = cast(CV); + VectorType *Ty = CVE->getType(); + Type *EltTy = Ty->getElementType(); + unsigned NElts = Ty->getNumElements(); + unsigned RealNElts = DL.getTypeAllocSize(Ty) / DL.getTypeAllocSize(EltTy); + + unsigned I; + for (I = 0; I < NElts; ++I) + append(cast(CVE->getElementAsConstant(I)), Var); + + Constant *Zero = Constant::getNullValue(EltTy); + while (I < RealNElts) { + append(Zero, Var); + ++I; + } + + break; + } + case Value::ConstantIntVal: { + const ConstantInt *CI = cast(CV); + if (CI->getType()->isIntegerTy(1)) { + LE.write(static_cast(CI->getZExtValue() ? 1 : 0)); + } else { + switch (CI->getBitWidth()) { + case 8: + LE.write(static_cast(CI->getZExtValue())); + break; + case 16: + LE.write(static_cast(CI->getZExtValue())); + break; + case 32: + LE.write(static_cast(CI->getZExtValue())); + break; + case 64: + LE.write(static_cast(CI->getZExtValue())); + break; + } + } + break; + } + case Value::ConstantFPVal: { + const ConstantFP *CFP = cast(CV); + if (CFP->getType()->isFloatTy()) + LE.write(CFP->getValueAPF().convertToFloat()); + else if (CFP->getType()->isDoubleTy()) + LE.write(CFP->getValueAPF().convertToDouble()); + else + llvm_unreachable("unhandled ConstantFP type"); + break; + } + case Value::ConstantPointerNullVal: { + unsigned AS = CV->getType()->getPointerAddressSpace(); + if (DL.getPointerSize(AS) == 8) + LE.write(static_cast(0)); + else + LE.write(static_cast(0)); + break; + } + case Value::UndefValueVal: + case Value::ConstantAggregateZeroVal: { + uint64_t Size = DL.getTypeAllocSize(CV->getType()); + for (uint64_t I = 0; I < Size / InitEltSize; ++I) { + switch (InitEltSize) { + case 1: + LE.write(static_cast(0)); + break; + case 2: + LE.write(static_cast(0)); + break; + case 4: + LE.write(static_cast(0)); + break; + case 8: + LE.write(static_cast(0)); + break; + default: + llvm_unreachable("unhandled size"); + } + } + + break; + } + case Value::GlobalVariableVal: + case Value::ConstantExprVal: { + const MCExpr *Expr = AP.lowerConstant(CV); + + // Offset that address needs to be written at is the current size of the + // buffer. + uint64_t CurrOffset = dataSizeInBytes(); + + unsigned Size = DL.getTypeAllocSize(CV->getType()); + switch (Size) { + case 4: + LE.write(static_cast(0)); + break; + case 8: + LE.write(static_cast(0)); + break; + default: + llvm_unreachable("unhandled size"); + } + + VarInitAddresses.emplace_back(CurrOffset, Expr); + break; + } + default: + llvm_unreachable("unhandled initializer"); + } +} + +// FIXME: Duplicated in HSAILAsmPrinter +void StoreInitializer::printFloat(uint32_t Val, raw_ostream &O) { + O << format("0F%08" PRIx32, Val); +} + +void StoreInitializer::printDouble(uint64_t Val, raw_ostream &O) { + O << format("0D%016" PRIx64, Val); +} + +void StoreInitializer::print(raw_ostream &O) { + StringRef Str = str(); + assert(Str.size() % InitEltSize == 0); + + if (InitEltSize == 1) { + for (size_t I = 0, E = Str.size(); I != E; ++I) { + if (I != 0) + O << ", "; + + O << (static_cast(Str[I]) & 0xff); + } + + return; + } + + for (unsigned I = 0, E = Str.size(); I != E; I += InitEltSize) { + if (I != 0) + O << ", "; + + const char *Ptr = &Str.data()[I]; + switch (InitEltSize) { + case 4: { + uint32_t Tmp; + std::memcpy(&Tmp, Ptr, 4); + + if (IsFPElt) + printFloat(Tmp, O); + else + O << Tmp; + break; + } + case 8: { + uint64_t Tmp; + std::memcpy(&Tmp, Ptr, 8); + + if (IsFPElt) + printDouble(Tmp, O); + else + O << Tmp; + break; + } + case 2: { + uint16_t Tmp; + std::memcpy(&Tmp, Ptr, 2); + + assert(!IsFPElt && "half not implemented"); + O << Tmp; + break; + } + default: + llvm_unreachable("Unhandled element size"); + } + } +} Index: lib/Target/HSAIL/HSAILSubtarget.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILSubtarget.h @@ -0,0 +1,173 @@ +//===---- HSAILSubtarget.h - Define Subtarget for the HSAIL -----*- C++ -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the HSAIL specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef _HSAIL_SUBTARGET_H_ +#define _HSAIL_SUBTARGET_H_ + +#include "HSAIL.h" +#include "HSAILFrameLowering.h" +#include "HSAILInstrInfo.h" +#include "HSAILISelLowering.h" + +#include "llvm/ADT/SmallVector.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +#define GET_SUBTARGETINFO_HEADER +#include "HSAILGenSubtargetInfo.inc" + +namespace llvm { +class HSAILTargetMachine; + +class HSAILSamplerHandle { +private: + std::string mSym; + unsigned int mVal; + bool mIsRO; + bool mEmitted; + +public: + HSAILSamplerHandle(HSAILSamplerHandle ©) { + mSym = copy.mSym; + mVal = copy.mVal; + mIsRO = copy.mIsRO; + mEmitted = copy.mEmitted; + } + + HSAILSamplerHandle(/*bool isImage, */ const char *sym) { + mSym = sym; + mVal = 0; + mIsRO = false; + mEmitted = false; + } + + HSAILSamplerHandle(/*bool isImage, */ unsigned int u) { + mVal = u; + mIsRO = false; + mEmitted = false; + } + + inline void setSym(std::string str) { mSym = str; } + inline std::string getSym() { return mSym; } + inline unsigned int getVal() { return mVal; } + inline bool isRO() { return mIsRO; } + inline void setRO() { mIsRO = true; } + inline bool isEmitted() { return mEmitted; } + inline void setEmitted() { mEmitted = true; } +}; + +class HSAILImageHandles { +private: + // Image and sampler kernel args + SmallVector HSAILImageArgs; + + // Sampler initializers + SmallVector HSAILSamplers; + unsigned index; + +public: + HSAILImageHandles() { index = 0; } + // TODO_HSA Add a destructor + + SmallVector getSamplerHandles() { + return HSAILSamplers; + } + HSAILSamplerHandle *getSamplerHandle(unsigned index); + + unsigned findOrCreateImageHandle(const char *sym); + unsigned findOrCreateSamplerHandle(unsigned int u); + + std::string getImageSymbol(unsigned index); + std::string getSamplerSymbol(unsigned index); + unsigned getSamplerValue(unsigned index); + + bool isSamplerSym(std::string sym); + + void finalize(); + void clearImageArgs(); +}; + +class HSAILKernelManager; + +class HSAILSubtarget : public HSAILGenSubtargetInfo { +protected: + Triple TargetTriple; + std::string DevName; + bool IsLargeModel; + bool HasImages; + bool IsGCN; + + HSAILFrameLowering FrameLowering; + std::unique_ptr TLInfo; + std::unique_ptr InstrInfo; + + // FIXME: It makes no sense for this to be here. + HSAILImageHandles *imageHandles; + +public: + /// This constructor initializes the data members to match that + /// of the specified triple. + /// + + HSAILSubtarget(StringRef TT, StringRef CPU, StringRef FS, + HSAILTargetMachine &TM); + HSAILSubtarget &initializeSubtargetDependencies(StringRef GPU, StringRef FS); + + const HSAILRegisterInfo *getRegisterInfo() const override { + return &getInstrInfo()->getRegisterInfo(); + } + + const HSAILInstrInfo *getInstrInfo() const override { + return InstrInfo.get(); + } + + const HSAILTargetLowering *getTargetLowering() const override { + return TLInfo.get(); + } + + const HSAILFrameLowering *getFrameLowering() const override { + return &FrameLowering; + } + + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + + bool isFullProfile() const { + return true; + } + + bool isLargeModel() const { + return IsLargeModel; + } + + bool isSmallModel() const { + return !IsLargeModel; + } + + bool hasImages() const { + return HasImages; + } + + bool isGCN() const { + return IsGCN; + } + + // FIXME: Remove this + bool supportMetadata30() const { return true; } + + StringRef getDeviceName() const { return DevName; } + + HSAILImageHandles *getImageHandles() const { return imageHandles; } +}; + +} // End llvm namespace + +#endif Index: lib/Target/HSAIL/HSAILSubtarget.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILSubtarget.cpp @@ -0,0 +1,117 @@ +//===------ HSAILSubtarget.cpp - Define Subtarget for the HSAIL -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the HSAIL specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#include "HSAILSubtarget.h" + +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "subtarget" + +#define GET_SUBTARGETINFO_CTOR +#define GET_SUBTARGETINFO_TARGET_DESC +#include "HSAILGenSubtargetInfo.inc" + +using namespace llvm; + +HSAILSubtarget::HSAILSubtarget(StringRef TT, StringRef CPU, StringRef FS, + HSAILTargetMachine &TM) + : HSAILGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT), + DevName(CPU.empty() ? "generic" : CPU.str()), + IsLargeModel(TargetTriple.getArch() == Triple::hsail64), + HasImages(false), + IsGCN(false), + FrameLowering(TargetFrameLowering::StackGrowsUp, 16, 0), TLInfo(), + InstrInfo(), imageHandles(new HSAILImageHandles()) { + initializeSubtargetDependencies(CPU, FS); + + InstrInfo.reset(new HSAILInstrInfo(*this)); + TLInfo.reset(new HSAILTargetLowering(TM, *this)); +} + +HSAILSubtarget &HSAILSubtarget::initializeSubtargetDependencies(StringRef GPU, + StringRef FS) { + ParseSubtargetFeatures(GPU, FS); + return *this; +} + +// +// Support for processing Image and Sampler kernel args and operands. +// +unsigned HSAILImageHandles::findOrCreateImageHandle(const char *sym) { + // Check for image arg with same value already present + std::string symStr = sym; + for (unsigned i = 0; i < HSAILImageArgs.size(); i++) { + if (HSAILImageArgs[i] == symStr) { + return i; + } + } + HSAILImageArgs.push_back(symStr); + return HSAILImageArgs.size() - 1; +} + +unsigned HSAILImageHandles::findOrCreateSamplerHandle(unsigned int u) { + // Check for handle with same value already present + for (unsigned i = 0; i < HSAILSamplers.size(); i++) { + if (getSamplerValue(i) == u) { + return i; + } + } + HSAILSamplerHandle *handle = new HSAILSamplerHandle(u); + HSAILSamplers.push_back(handle); + return HSAILSamplers.size() - 1; +} + +HSAILSamplerHandle *HSAILImageHandles::getSamplerHandle(unsigned index) { + assert(index < HSAILSamplers.size() && "Invalid sampler index"); + return HSAILSamplers[index]; +} + +std::string HSAILImageHandles::getImageSymbol(unsigned index) { + assert(index < HSAILImageArgs.size() && "Invalid image arg index"); + return HSAILImageArgs[index]; +} + +std::string HSAILImageHandles::getSamplerSymbol(unsigned index) { + assert(index < HSAILSamplers.size() && "Invalid sampler index"); + return HSAILSamplers[index]->getSym(); +} + +unsigned HSAILImageHandles::getSamplerValue(unsigned index) { + assert(index < HSAILSamplers.size() && "Invalid sampler index"); + return HSAILSamplers[index]->getVal(); +} + +bool HSAILImageHandles::isSamplerSym(std::string sym) { + for (unsigned i = 0; i < HSAILSamplers.size(); i++) { + if (getSamplerSymbol(i) == sym) { + return true; + } + } + return false; +} + +void HSAILImageHandles::finalize() { + // printf("ImageHandles before finalize\n"); + // dump(); + char buf[16]; + for (unsigned i = 0; i < HSAILSamplers.size(); i++) { + if (getSamplerSymbol(i).empty()) { + sprintf(buf, "%s%u", "__Samp", index); + HSAILSamplers[i]->setSym(buf); + index++; + } + } +} + +void HSAILImageHandles::clearImageArgs() { HSAILImageArgs.clear(); } Index: lib/Target/HSAIL/HSAILTargetMachine.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILTargetMachine.h @@ -0,0 +1,106 @@ +//=-- HSAILTargetMachine.h - Define TargetMachine for the HSAIL ---*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the HSAIL specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef _HSAIL_TARGET_MACHINE_H_ +#define _HSAIL_TARGET_MACHINE_H_ + +#include "HSAILIntrinsicInfo.h" +#include "HSAILSubtarget.h" + +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class HSAILTargetMachine : public LLVMTargetMachine { +private: + HSAILSubtarget Subtarget; + HSAILIntrinsicInfo IntrinsicInfo; + TargetLoweringObjectFile *TLOF; + +public: + class HSAILSelectionDAGInfo : public TargetSelectionDAGInfo { + public: + explicit HSAILSelectionDAGInfo(const HSAILTargetMachine &TM) + : TargetSelectionDAGInfo(TM.getDataLayout()) {} + }; + +public: + HSAILTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, + CodeModel::Model CM, CodeGenOpt::Level OL); + + const HSAILIntrinsicInfo *getIntrinsicInfo() const override { + return &IntrinsicInfo; + } + + const HSAILSubtarget *getSubtargetImpl() const { + return &Subtarget; + } + + const HSAILSubtarget *getSubtargetImpl(const Function &) const override { + return &Subtarget; + } + + TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF; } + + TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + + CodeGenFileType HSAILFileType; + +public: + bool addPassesToEmitFile(PassManagerBase &PM, raw_pwrite_stream &Out, + CodeGenFileType FT, bool DisableVerify = true, + AnalysisID StartAfter = 0, + AnalysisID StopAfter = 0) override; +}; + +class HSAIL_32TargetMachine : public HSAILTargetMachine { + HSAILSelectionDAGInfo TSInfo; + +public: + HSAIL_32TargetMachine(const Target &T, StringRef TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); + + void dump(raw_ostream &O); +}; + +class HSAIL_64TargetMachine : public HSAILTargetMachine { + HSAILSelectionDAGInfo TSInfo; + +public: + HSAIL_64TargetMachine(const Target &T, StringRef TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); +}; + +class HSAILPassConfig : public TargetPassConfig { +public: + HSAILPassConfig(HSAILTargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + // Pass Pipeline Configuration + void addIRPasses() override; + void addPreEmitPass() override; + bool addPreISel() override; + bool addInstSelector() override; + void addPreRegAlloc() override; + void addPostRegAlloc() override; +}; +} // End llvm namespace +#endif Index: lib/Target/HSAIL/HSAILTargetMachine.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILTargetMachine.cpp @@ -0,0 +1,206 @@ +//===-- HSAILTargetMachine.cpp - Define TargetMachine for the HSAIL -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the HSAIL specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#include "HSAILTargetMachine.h" +#include "HSAILELFTargetObjectFile.h" + +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Scalar.h" + +using namespace llvm; + +#if HSAIL_USE_LIBHSAIL +static cl::opt UseStandardAsmPrinter( + "hsail-asmprinter", + cl::desc("Use standard LLVM AsmPrinter instead of BRIGAsmPrinter"), + cl::init(false)); +#else +static const bool UseStandardAsmPrinter = true; +#endif + +extern "C" void LLVMInitializeHSAILTarget() { + // Register the target. + RegisterTargetMachine X(TheHSAIL_32Target); + RegisterTargetMachine Y(TheHSAIL_64Target); +} + +extern "C" void LLVMInitializeBRIGAsmPrinter(); + +static TargetLoweringObjectFile *createTLOF(const Triple &TT) { + if (UseStandardAsmPrinter) + return new HSAILTargetObjectFile(); + + if (TT.getArch() == Triple::hsail64) + return new BRIG64_DwarfTargetObjectFile(); + return new BRIG32_DwarfTargetObjectFile(); +} + +static StringRef computeDataLayout(const Triple &T) { + if (T.getArch() == Triple::hsail64) { + return "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32" + "-p6:32:32-p7:64:64-p8:32:32-p9:64:64-i1:8:8-i8:8:8" + "-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32" + "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64" + "-v96:128:128-v128:128:128-v192:256:256-v256:256:256" + "-v512:512:512-v1024:1024:1024-v2048:2048:2048" + "-n32:64"; + } + + return "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16" + "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32" + "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64" + "-v96:128:128-v128:128:128-v192:256:256-v256:256:256" + "-v512:512:512-v1024:1024:1024-v2048:2048:2048" + "-n32:64"; +} + +// Hack to prevent weird standard OS directives from being printed when the +// triple is not fully specified. e.g. on a OS X host, there is no other way to +// disable printing .macosx_version_min at the start of the module. +LLVM_READONLY +static Triple getTripleNoOS(StringRef Str) { + Triple TT(Str); + if (TT.getOS() != Triple::UnknownOS) + TT.setOS(Triple::UnknownOS); + + return TT; +} + +HSAILTargetMachine::HSAILTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : LLVMTargetMachine(T, computeDataLayout(getTripleNoOS(TT)), + getTripleNoOS(TT).str(), CPU, FS, Options, RM, CM, OL), + Subtarget(TT, CPU, FS, *this), IntrinsicInfo(this), + TLOF(createTLOF(Triple(getTargetTriple()))) { + initAsmInfo(); + +#if HSAIL_USE_LIBHSAIL + // FIXME: Hack to enable command line switch to switch between + // BRIGAsmPrinter and HSAILAsmPrinter. Override the default registered + // AsmPrinter to use the BRIGAsmPrinter. + if (!UseStandardAsmPrinter) + LLVMInitializeBRIGAsmPrinter(); +#endif +} + +bool HSAILTargetMachine::addPassesToEmitFile( + PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FT, + bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter) { + HSAILFileType = FT; // FIXME: Remove this. + + if (!UseStandardAsmPrinter) { + // Use CGFT_ObjectFile regardless on the output format. + // To process CGFT_AssemblyFile we will later disassemble generated BRIG. + FT = CGFT_ObjectFile; + } + + return LLVMTargetMachine::addPassesToEmitFile(PM, Out, FT, DisableVerify, + StartAfter, StopAfter); +} + +TargetPassConfig *HSAILTargetMachine::createPassConfig(PassManagerBase &PM) { + return new HSAILPassConfig(this, PM); +} + +void HSAILPassConfig::addIRPasses() { + addPass(createHSAILAlwaysInlinePass()); + addPass(createAlwaysInlinerPass()); + + // AddrSpaceCast optimization and lowering. Add dead code elimination + // to eliminate dead instructions (AddrSpaceCast, etc.). + TargetPassConfig::addIRPasses(); +} + +bool HSAILPassConfig::addPreISel() { + addPass(createLCSSAPass()); // Required by early CFG opts + + return true; +} + +bool HSAILPassConfig::addInstSelector() { + HSAILTargetMachine &HSATM = getTM(); + // return + // HSAILTargetMachine::addInstSelector(*PM,HSATM.Options,HSATM.getOptLevel()); + // mOptLevel = OptLevel; + // Install an instruction selector. + + addPass(createHSAILISelDag(HSATM)); + +#if 0 + addPass(&DeadMachineInstructionElimID); + if (EnableUniformOps) { + addPass(createHSAILUniformOperations(HSATM)); + } +#endif + + return false; +} +void HSAILPassConfig::addPreEmitPass() { } +void HSAILPassConfig::addPreRegAlloc() { } +void HSAILPassConfig::addPostRegAlloc() { } + +//===----------------------------------------------------------------------===// +// HSAIL_32Machine functions +//===----------------------------------------------------------------------===// +HSAIL_32TargetMachine::HSAIL_32TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) + : HSAILTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), TSInfo(*this) { + Triple TheTriple(TT); + + // Check for mismatch in target triple settings and data layout. Note the + // target + // triple comes from the module (unless overridden on command line). It's just + // a + // warning, but users should know if they're specifying --march=hsail-64 on a + // 32-bit module or --march=hsail on a 64-bit module. + if (TheTriple.getArch() == Triple::hsail64) { + errs() + << "warning: target triple '" << TT + << "' does not match target 'hsail', expecting hsail-pc-amdopencl.\n"; + } +} + +//===----------------------------------------------------------------------===// +// HSAIL_64Machine functions +//===----------------------------------------------------------------------===// +HSAIL_64TargetMachine::HSAIL_64TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) + : HSAILTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), TSInfo(*this) { + Triple TheTriple(TT); + + // Check for mismatch in target triple settings and data layout. Note the + // target + // triple comes from the module (unless overridden on command line). It's just + // a + // warning, but users should know if they're specifying --march=hsail-64 on a + // 32-bit module. + if (TheTriple.getArch() == Triple::hsail) { + errs() << "warning: target triple '" << TT << "' does not match target " + "'hsail-64', expecting " + "hsail64-pc-amdopencl.\n"; + } +} Index: lib/Target/HSAIL/HSAILUtil/CMakeLists.txt =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILUtil/CMakeLists.txt @@ -0,0 +1,2 @@ +include_directories(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/..) +add_llvm_library(LLVMHSAILUtil HSAILUtil.cpp) Index: lib/Target/HSAIL/HSAILUtil/HSAILUtil.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILUtil/HSAILUtil.cpp @@ -0,0 +1,18 @@ +//===-- HSAILUtil.cpp -----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is just to split getNamedOperandIdx out from the rest of the target +// library to avoid the HSAILAsmPrinter library depending on it. + +#include "llvm/Support/Compiler.h" +#include "llvm/Support/DataTypes.h" +#include "../MCTargetDesc/HSAILMCTargetDesc.h" + +#define GET_INSTRINFO_NAMED_OPS +#include "HSAILGenInstrInfo.inc" Index: lib/Target/HSAIL/HSAILUtilityFunctions.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILUtilityFunctions.h @@ -0,0 +1,62 @@ +//===-- HSAILUtilityFunctions.h - Utility Functions Header ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This file provides declarations for functions that are used across different +// classes and provide various conversions or utility to shorten the code +// +//===----------------------------------------------------------------------===// + +#ifndef HSAILUTILITYFUNCTIONS_H_ +#define HSAILUTILITYFUNCTIONS_H_ + +#include "HSAIL.h" +#include "HSAILBrigDefs.h" +#include "HSAILTargetMachine.h" + +namespace llvm { +class GlobalVariable; +class HSAILMachineFunctionInfo; +class MachineInstr; +class TargetRegisterClass; +class Type; + +namespace HSAIL { + +BrigType getBrigType(Type *Ty, const DataLayout &DL, bool Signed = false); + +/// Returns the type to use when expressing the type in HSAIL. If this will be +/// expressed as an HSAIL array, set NElts to the number of elements, +/// otherwise 0. +Type *analyzeType(Type *Ty, unsigned &NElts, const DataLayout &DL); + +unsigned getAlignTypeQualifier(Type *ty, const DataLayout &DL, + bool isPreferred); + +static inline bool isConv(const MachineInstr *MI) { + return MI->getDesc().TSFlags & HSAILInstrFlags::IS_CONV; +} + +static inline bool isImageInst(const MachineInstr *MI) { + return MI->getDesc().TSFlags & HSAILInstrFlags::IS_IMAGEINST; +} + +bool isKernelFunc(const Function *F); +bool isSPIRModule(const Module &M); + +bool notUsedInKernel(const GlobalVariable *GV); +bool isIgnoredGV(const GlobalVariable *GV); + +bool sanitizedGlobalValueName(StringRef, SmallVectorImpl &); +bool sanitizeGlobalValueName(llvm::GlobalValue *GV); + +} // End namespace HSAIL + +} // End namespace llvm + +#endif // HSAILUTILITYFUNCTIONS_H_ Index: lib/Target/HSAIL/HSAILUtilityFunctions.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILUtilityFunctions.cpp @@ -0,0 +1,330 @@ +//===-- HSAILUtilityFunctions.cpp - HSAIL Utility Functions ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HSAILUtilityFunctions.h" +#include "HSAILInstrInfo.h" +#include "HSAILISelLowering.h" +#include "HSAILMachineFunctionInfo.h" +#include "HSAILOpaqueTypes.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/ValueMap.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/Debug.h" +#include +#include +#include + +using namespace llvm; + +namespace llvm { + +namespace HSAIL { + +BrigType getBrigType(Type *type, const DataLayout &DL, bool Signed) { + switch (type->getTypeID()) { + case Type::VoidTyID: + return BRIG_TYPE_NONE; // TODO_HSA: FIXME: void + case Type::FloatTyID: + return BRIG_TYPE_F32; + case Type::DoubleTyID: + return BRIG_TYPE_F64; + case Type::IntegerTyID: + if (type->isIntegerTy(8)) { + return Signed ? BRIG_TYPE_S8 : BRIG_TYPE_U8; + } else if (type->isIntegerTy(16)) { + return Signed ? BRIG_TYPE_S16 : BRIG_TYPE_U16; + } else if (type->isIntegerTy(32)) { + return Signed ? BRIG_TYPE_S32 : BRIG_TYPE_U32; + } else if (type->isIntegerTy(64)) { + return Signed ? BRIG_TYPE_S64 : BRIG_TYPE_U64; + } else if (type->isIntegerTy(1)) { + return BRIG_TYPE_B1; + } else + llvm_unreachable("Unhandled type"); + break; + case Type::PointerTyID: { + if (OpaqueType OT = GetOpaqueType(type)) { + if (IsImage(OT)) + return BRIG_TYPE_RWIMG; + if (OT == Sampler) + return BRIG_TYPE_SAMP; + } + unsigned AS = cast(type)->getAddressSpace(); + return DL.getPointerSize(AS) == 8 ? BRIG_TYPE_U64 : BRIG_TYPE_U32; + } + case Type::StructTyID: + // Treat struct as array of bytes. + return BRIG_TYPE_U8_ARRAY; + case Type::VectorTyID: + return static_cast( + getBrigType(type->getScalarType(), DL, Signed) | BRIG_TYPE_ARRAY); + case Type::ArrayTyID: + return static_cast( + getBrigType(cast(type)->getElementType(), DL, Signed) | + BRIG_TYPE_ARRAY); + default: + type->dump(); + llvm_unreachable("Unhandled type"); + } +} + +Type *analyzeType(Type *Ty, unsigned &NElts, const DataLayout &DL) { + // Scan through levels of nested arrays until we get to something that can't + // be expressed as a simple array element. + if (ArrayType *AT = dyn_cast(Ty)) { + Type *EltTy; + NElts = 1; + + while (AT) { + NElts *= AT->getNumElements(); + EltTy = AT->getElementType(); + AT = dyn_cast(EltTy); + } + + unsigned EltElts = ~0u; + + // We could have arrays of vectors or structs. + Type *Tmp = analyzeType(EltTy, EltElts, DL); + + // We only need to multiply if this was a nested vector type. + if (EltElts != 0) + NElts *= EltElts; + + return Tmp; + } + + if (VectorType *VT = dyn_cast(Ty)) { + Type *EltTy = VT->getElementType(); + + // We need to correct the number of elements in the case of 3x vectors since + // in memory they occupy 4 elements. + NElts = DL.getTypeAllocSize(Ty) / DL.getTypeAllocSize(EltTy); + assert(NElts >= VT->getNumElements()); + + // FIXME: It's not clear what the behavior of these is supposed to be and + // aren't consistently handled. + if (EltTy->isIntegerTy(1)) + report_fatal_error("i1 vector initializers not handled"); + + return EltTy; + } + + if (isa(Ty)) { + NElts = DL.getTypeAllocSize(Ty); + return Type::getInt8Ty(Ty->getContext()); + } + + assert(!Ty->isAggregateType()); + + NElts = 0; + + // Arrays of i1 are not supported, and must be replaced with byte sized + // elements. + if (Ty->isIntegerTy(1)) + return Type::getInt8Ty(Ty->getContext()); + + return Ty; +} + +unsigned getAlignTypeQualifier(Type *ty, const DataLayout &DL, + bool isPreferred) { + unsigned align = 0; + + if (ArrayType *ATy = dyn_cast(ty)) + ty = ATy->getElementType(); + + if (IsImage(ty) || IsSampler(ty)) + return 8; + + align = + isPreferred ? DL.getPrefTypeAlignment(ty) : DL.getABITypeAlignment(ty); + + unsigned max_align = (1 << (BRIG_ALIGNMENT_MAX - BRIG_ALIGNMENT_1)); + if (align > max_align) + align = max_align; + + assert(align && (align & (align - 1)) == 0); + + return align; +} + +static bool isKernelFunc(StringRef str) { + if (str.startswith("__OpenCL_") && str.endswith("_kernel")) + return true; + return false; +} + +bool isKernelFunc(const Function *F) { + if (CallingConv::SPIR_KERNEL == F->getCallingConv()) + return true; + + return isKernelFunc(F->getName()); +} + +/// \brief Check if a global variable is used in any "real" code. +/// +/// We iterate over the entire tree of users, looking for any use in +/// the kernel code. The traversal ignores any use in metadata. There +/// is only one way to use a global variable in metadata --- by using +/// it in a global variable that occurs in the "llvm.metadata" +/// section. (MDNode is not a subclass of llvm::User, and hence they +/// can't occur in the user tree.) The traversal returns early if the +/// user is an instruction. +/// +/// Assumption: Instructions do not occur in metadata. Also, we don't +/// worry about dead code so late in the flow. +bool notUsedInKernel(const GlobalVariable *GV) { + SmallVector worklist; // arbitrary choice of 32 + + // We only inspect the users of GV, hence GV itself is never + // inserted in the worklist. + worklist.append(GV->user_begin(), GV->user_end()); + + while (!worklist.empty()) { + const User *user = worklist.pop_back_val(); + + if (const GlobalValue *GUser = dyn_cast(user)) { + if (std::string("llvm.metadata") == GUser->getSection()) + continue; + } + + if (isa(user)) + return false; + + worklist.append(user->user_begin(), user->user_end()); + } + + return true; +} + +bool sanitizedGlobalValueName(StringRef Name, SmallVectorImpl &Out) { + // Poor man's regexp check. + static const StringRef Syntax("abcdefghijklmnopqrstuvwxyz" + "_." + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "0123456789"); + + static const StringRef FirstCharSyntax("abcdefghijklmnopqrstuvwxyz" + "_" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); + + static const StringRef Digits("0123456789"); + + SmallString<32> NewName; + + // The second character (after the prefix) of an identifier must must be a + // letter or underscore. + if (FirstCharSyntax.find(Name[0]) == StringRef::npos) { + NewName += '_'; + Name = Name.drop_front(1); + } + + size_t p = 0; + size_t q = 0; + + while (q != StringRef::npos) { + q = Name.find_first_not_of(Syntax, p); + // If q == p, the character at p itself violates the syntax. + if (q != p) { + // Consume everything before q, not including q (even if q == npos). + NewName += Name.slice(p, q); + } + + // If not found, do not replace. + if (q == StringRef::npos) + break; + + // Replace found character with underscore. + NewName += '_'; + + // Then we directly move on to the next character: skip q. + p = q + 1; + } + + // opt may generate empty names and names started with digit. + if (Name.empty() || Digits.find(Name[0]) != StringRef::npos || + !Name.equals(NewName)) { + // Add prefix to show that the name was replaced by HSA. + // LLVM's setName adds seq num in case of name duplicating. + Out.append(NewName.begin(), NewName.end()); + return true; + } + + return false; +} + +/// \brief Rename a global variable to satisfy HSAIL syntax. +/// +/// We simply drop all characters from the name that are disallowed by +/// HSAIL. When the resulting string is applied as a name, it will be +/// automatically modified to resolve conflicts. +bool sanitizeGlobalValueName(GlobalValue *GV) { + SmallString<256> NewName; + + if (sanitizedGlobalValueName(GV->getName(), NewName)) { + // Add prefix to show that the name was replaced by HSA. + // LLVM's setName adds seq num in case of name duplicating. + GV->setName(Twine("__hsa_replaced_") + Twine(NewName)); + return true; + } + + return false; +} + +bool isIgnoredGV(const GlobalVariable *GV) { + unsigned AS = GV->getType()->getAddressSpace(); + + if (AS == HSAILAS::PRIVATE_ADDRESS || AS == HSAILAS::GROUP_ADDRESS) + return true; + + if (GV->hasLocalLinkage() && notUsedInKernel(GV)) + return true; + + StringRef GVname = GV->getName(); + + // FIXME: Should be removed + return GVname.startswith("sgv") || GVname.startswith("fgv") || + GVname.startswith("lvgv") || GVname.startswith("pvgv") || + // TODO_HSA: suppress emitting annotations as global declarations for + // now. These are labelled as "llvm.metadata". How should we handle + // these? + GVname.startswith("llvm.argtypeconst.annotations") || + GVname.startswith("llvm.argtypename.annotations") || + GVname.startswith("llvm.constpointer.annotations") || + GVname.startswith("llvm.global.annotations") || + GVname.startswith("llvm.image.annotations") || + GVname.startswith("llvm.readonlypointer.annotations") || + GVname.startswith("llvm.restrictpointer.annotations") || + GVname.startswith("llvm.signedOrSignedpointee.annotations") || + GVname.startswith("llvm.volatilepointer.annotations") || + GVname.startswith("llvm.sampler.annotations"); +} + +/// \brief Check whether the module contains SPIR +/// +/// We cannot use metadata such as "opencl.spir.version", or the +/// target triple, because these can come in via the builtins library +/// as well. Instead we rely on the fact that there are no kernels in +/// the builtins library, and hence "opencl.kernels" is absent. +bool isSPIRModule(const Module &M) { + return M.getNamedMetadata("opencl.kernels"); +} + +} // End namespace HSAIL + +} // End namespace llvm Index: lib/Target/HSAIL/InstPrinter/CMakeLists.txt =================================================================== --- /dev/null +++ lib/Target/HSAIL/InstPrinter/CMakeLists.txt @@ -0,0 +1,4 @@ +add_llvm_library(LLVMHSAILAsmPrinter + HSAILInstPrinter.cpp + ) +target_link_libraries(LLVMHSAILAsmPrinter PRIVATE LLVMHSAILUtil) \ No newline at end of file Index: lib/Target/HSAIL/InstPrinter/HSAILInstPrinter.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/InstPrinter/HSAILInstPrinter.h @@ -0,0 +1,170 @@ +//===-- HSAILInstPrinter.h - HSAIL MC Inst -> ASM interface -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_INSTPRINTER_HSAILINSTPRINTER_H +#define LLVM_LIB_TARGET_HSAIL_INSTPRINTER_HSAILINSTPRINTER_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCInstPrinter.h" + +namespace llvm { + +class HSAILInstPrinter : public MCInstPrinter { +private: + bool InArgScope; + +public: + HSAILInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI); + + // Autogenerated by tblgen + void printInstruction(const MCInst *MI, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); + + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; + +private: + // void printU8(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printU16(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU32(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU64(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printS8(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printS16(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printS32(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printS64(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printF16(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printF32(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printF64(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printB1(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printB8(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printB16(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printB32(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printB64(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printB128(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printSAMP(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printROIMG(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printWOIMG(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printRWIMG(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printSIG32(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printSIG64(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU8X4(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU8X8(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printU8X16(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU16X2(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU16X4(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printU16X8(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU32X2(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printU32X4(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printU64X2(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printS8X4(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printS8X8(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printS8X16(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printS16X2(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printS16X4(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printS16X8(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printS32X2(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printS32X4(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printS64X2(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printF16X2(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printF16X4(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printF16X8(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printF32X2(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printF32X4(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printF64X2(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printUImmediate(uint64_t Imm, raw_ostream &O); + void printSImmediate(int64_t Imm, raw_ostream &O); + void printAddrMode3Op(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printVec2Op(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printVec3Op(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printVec4Op(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printV2U32(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printV2F32(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printV2U64(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printV2F64(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printV3U32(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printV3F32(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printV3U64(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printV3F64(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printV4U32(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printV4F32(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printV4U64(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printV4F64(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printFTZ(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printNoNull(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printV4(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printBrigAlignment(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printArgDeclAlignment(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printArraySize(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printEquiv(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printBrigAllocation(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printBrigAluModifierMask(const MCInst *MI, unsigned OpNo, + raw_ostream &O); + void printBrigAtomicOperation(const MCInst *MI, unsigned OpNo, + raw_ostream &O); + void printBrigCompareOperation(const MCInst *MI, unsigned OpNo, + raw_ostream &O); + void printBrigControlDirective(const MCInst *MI, unsigned OpNo, + raw_ostream &O); + + void printBrigExecutableModifierMask(const MCInst *MI, unsigned OpNo, + raw_ostream &O); + void printBrigImageChannelOrder(const MCInst *MI, unsigned OpNo, + raw_ostream &O); + void printBrigImageChannelType(const MCInst *MI, unsigned OpNo, + raw_ostream &O); + void printBrigImageGeometry(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printBrigImageQuery(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printBrigLinkage(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printBrigMachineModel(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printBrigMemoryModifierMask(const MCInst *MI, unsigned OpNo, + raw_ostream &O); + void printBrigMemoryOrder(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printBrigMemoryScope(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printBrigPack(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printBrigProfile(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printBrigRound(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printBrigSamplerAddressing(const MCInst *MI, unsigned OpNo, + raw_ostream &O); + void printBrigSamplerCoordNormalization(const MCInst *MI, unsigned OpNo, + raw_ostream &O); + + void printBrigSamplerFilter(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printBrigSamplerQuery(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printBrigSegCvtModifierMask(const MCInst *MI, unsigned OpNo, + raw_ostream &O); + + void printBrigSegment(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printBrigType(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printBrigVariableModifierMask(const MCInst *MI, unsigned OpNo, + raw_ostream &O); + + void printBrigWidth(const MCInst *MI, unsigned OpNo, raw_ostream &O); +}; + +} // End namespace llvm + +#endif Index: lib/Target/HSAIL/InstPrinter/HSAILInstPrinter.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/InstPrinter/HSAILInstPrinter.cpp @@ -0,0 +1,1729 @@ +//===-- HSAILInstPrinter.cpp - HSAIL MC Inst -> ASM -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +/// \file +//===----------------------------------------------------------------------===// + +#include "HSAILInstPrinter.h" +#include "HSAIL.h" +#include "HSAILBrigDefs.h" +#include "HSAILInstrInfo.h" + +#include "MCTargetDesc/HSAILMCTargetDesc.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +HSAILInstPrinter::HSAILInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI), + InArgScope(false) {} + +void HSAILInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, + StringRef Annot, const MCSubtargetInfo &STI) { + + if (MI->getOpcode() == HSAIL::ARG_SCOPE_START) { + InArgScope = true; + printInstruction(MI, OS); + printAnnotation(OS, Annot); + return; + } + + if (MI->getOpcode() == HSAIL::ARG_SCOPE_END) { + InArgScope = false; + printInstruction(MI, OS); + printAnnotation(OS, Annot); + return; + } + + // Indent any instructions in a call scope. + if (InArgScope) + OS << '\t'; + + printInstruction(MI, OS); + + // Special case call because there appears to be no way to handle variable_ops + // in the generated printer. + if (MI->getOpcode() == HSAIL::CALL) { + // First operand is called function, and should have been automatically + // printed. We just need to specially handle the variable_ops. + unsigned I = 1; + + OS << '('; + + const MCOperand *Op = &MI->getOperand(1); + while (!Op->isImm()) { + printOperand(MI, I++, OS); + Op = &MI->getOperand(I); + } + + // Return value and argument symbols are delimited with a 0 value. + assert((Op->isImm() && Op->getImm() == 0) && + "Unexpected target call instruction operand list!"); + + // Skip the zero. + ++I; + + OS << ") ("; + + unsigned N = MI->getNumOperands(); + while (I < N) { + printOperand(MI, I++, OS); + + if (I < N) + OS << ", "; + } + + OS << ");"; + } + + printAnnotation(OS, Annot); +} + +void HSAILInstPrinter::printUImmediate(uint64_t Imm, raw_ostream &O) { + O << format("%" PRIu64, Imm); +} + +void HSAILInstPrinter::printSImmediate(int64_t Imm, raw_ostream &O) { + O << format("%" PRId64, Imm); +} + +void HSAILInstPrinter::printU32(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isImm()) { + O << format("%" PRIu32, Op.getImm()); + return; + } + + printOperand(MI, OpNo, O); +} + +void HSAILInstPrinter::printU64(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isImm()) { + O << format("%" PRIu64, Op.getImm()); + return; + } + + printOperand(MI, OpNo, O); +} + +void HSAILInstPrinter::printS32(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isImm()) { + O << format("%" PRId32, Op.getImm()); + return; + } + + printOperand(MI, OpNo, O); +} + +void HSAILInstPrinter::printS64(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isImm()) { + O << format("%" PRId64, Op.getImm()); + return; + } + + printOperand(MI, OpNo, O); +} + +void HSAILInstPrinter::printF16(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + llvm_unreachable("Implement me"); +} + +void HSAILInstPrinter::printF32(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isFPImm()) { + O << format("0F%08" PRIx32, FloatToBits(static_cast(Op.getFPImm()))); + return; + } + + assert(!Op.isImm()); + + printOperand(MI, OpNo, O); +} + +void HSAILInstPrinter::printF64(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isFPImm()) { + O << format("0D%016" PRIx64, DoubleToBits(Op.getFPImm())); + return; + } + + assert(!Op.isImm()); + + printOperand(MI, OpNo, O); +} + +void HSAILInstPrinter::printB1(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isImm()) { + O << (Op.getImm() ? '1' : '0'); + return; + } + + printOperand(MI, OpNo, O); +} + +void HSAILInstPrinter::printB32(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + printU32(MI, OpNo, O); +} + +void HSAILInstPrinter::printB64(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + printU64(MI, OpNo, O); +} + +void HSAILInstPrinter::printU8X4(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isImm()) { + union Bytes { + uint32_t I; + uint8_t Byte[4]; + } U; + + U.I = Op.getImm(); + O << format("u8x4(%" PRIu8 ",%" PRIu8 ",%" PRIu8 ",%" PRIu8 ")", U.Byte[3], + U.Byte[2], U.Byte[1], U.Byte[0]); + return; + } + + printOperand(MI, OpNo, O); +} + +void HSAILInstPrinter::printU8X8(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + llvm_unreachable("Implement me"); +} + +void HSAILInstPrinter::printU16X2(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isImm()) { + union Bytes { + uint32_t I; + uint16_t U16[2]; + } U; + + U.I = Op.getImm(); + O << format("u16x2(%" PRIu16 ",%" PRIu16 ")", U.U16[1], U.U16[0]); + return; + } + + printOperand(MI, OpNo, O); +} + +void HSAILInstPrinter::printU16X4(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + llvm_unreachable("Implement me"); +} + +void HSAILInstPrinter::printU32X2(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isImm()) { + union Bytes { + uint64_t I; + uint32_t U32[2]; + } U; + + U.I = Op.getImm(); + O << format("u32x2(%" PRIu32 ",%" PRIu32 ")", U.U32[1], U.U32[0]); + return; + } + + printOperand(MI, OpNo, O); +} + +void HSAILInstPrinter::printS8X4(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + llvm_unreachable("Implement me"); +} + +void HSAILInstPrinter::printS8X8(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + llvm_unreachable("Implement me"); +} + +void HSAILInstPrinter::printS16X2(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + llvm_unreachable("Implement me"); +} + +void HSAILInstPrinter::printS16X4(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + llvm_unreachable("Implement me"); +} + +void HSAILInstPrinter::printS32X2(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + llvm_unreachable("Implement me"); +} + +void HSAILInstPrinter::printF16X2(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + llvm_unreachable("Implement me"); +} + +void HSAILInstPrinter::printF16X4(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + llvm_unreachable("Implement me"); +} + +void HSAILInstPrinter::printAddrMode3Op(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &BaseOp = MI->getOperand(OpNo); + const MCOperand &RegOp = MI->getOperand(OpNo + 1); + const MCOperand &OffsetOp = MI->getOperand(OpNo + 2); + + assert(RegOp.isReg() && OffsetOp.isImm()); + + unsigned AddrReg = RegOp.getReg(); + int64_t Offset = OffsetOp.getImm(); + + if (BaseOp.isReg()) { + // FIXME: Why is this allowed to be a register? + assert(BaseOp.getReg() == HSAIL::NoRegister); + } else if (BaseOp.isExpr()) { + O << '['; + BaseOp.getExpr()->print(O, &MAI); + O << ']'; + } else if (BaseOp.isImm()) + O << BaseOp.getImm(); + else + llvm_unreachable("Unexpected type for base address operand"); + + // Have both register and immediate offset. + if (AddrReg != HSAIL::NoRegister && Offset != 0) { + O << '[' << getRegisterName(AddrReg); + + // If the offset is negative, it will be printed with the appropriate - + // already. + if (Offset > 0) + O << '+'; + + O << formatDec(Offset) << ']'; + return; + } + + // Only register offset. + if (AddrReg != HSAIL::NoRegister) { + O << '[' << getRegisterName(AddrReg) << ']'; + return; + } + + // Only have immediate offset. + if (Offset != 0) + O << '[' << formatDec(Offset) << ']'; +} + +void HSAILInstPrinter::printVec2Op(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << '('; + printOperand(MI, OpNo + 0, O); + O << ", "; + printOperand(MI, OpNo + 1, O); + O << ')'; +} + +void HSAILInstPrinter::printVec3Op(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << '('; + printOperand(MI, OpNo + 0, O); + O << ", "; + printOperand(MI, OpNo + 1, O); + O << ", "; + printOperand(MI, OpNo + 2, O); + O << ')'; +} + +void HSAILInstPrinter::printVec4Op(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << '('; + printOperand(MI, OpNo + 0, O); + O << ", "; + printOperand(MI, OpNo + 1, O); + O << ", "; + printOperand(MI, OpNo + 2, O); + O << ", "; + printOperand(MI, OpNo + 3, O); + O << ')'; +} + +void HSAILInstPrinter::printV2U32(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << '('; + printU32(MI, OpNo + 0, O); + O << ", "; + printU32(MI, OpNo + 1, O); + O << ')'; +} + +void HSAILInstPrinter::printV2F64(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << '('; + printF64(MI, OpNo + 0, O); + O << ", "; + printF64(MI, OpNo + 1, O); + O << ')'; +} + +void HSAILInstPrinter::printV2U64(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << '('; + printU64(MI, OpNo + 0, O); + O << ", "; + printU64(MI, OpNo + 1, O); + O << ')'; +} + +void HSAILInstPrinter::printV2F32(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << '('; + printF32(MI, OpNo + 0, O); + O << ", "; + printF32(MI, OpNo + 1, O); + O << ')'; +} + +void HSAILInstPrinter::printV3U32(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << '('; + printU32(MI, OpNo + 0, O); + O << ", "; + printU32(MI, OpNo + 1, O); + O << ", "; + printU32(MI, OpNo + 2, O); + O << ')'; +} + +void HSAILInstPrinter::printV3F32(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << '('; + printF32(MI, OpNo + 0, O); + O << ", "; + printF32(MI, OpNo + 1, O); + O << ", "; + printF32(MI, OpNo + 2, O); + O << ')'; +} + +void HSAILInstPrinter::printV3U64(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << '('; + printU64(MI, OpNo + 0, O); + O << ", "; + printU64(MI, OpNo + 1, O); + O << ", "; + printU64(MI, OpNo + 2, O); + O << ')'; +} + +void HSAILInstPrinter::printV3F64(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << '('; + printF64(MI, OpNo + 0, O); + O << ", "; + printF64(MI, OpNo + 1, O); + O << ", "; + printF64(MI, OpNo + 2, O); + O << ')'; +} + +void HSAILInstPrinter::printV4U32(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << '('; + printU32(MI, OpNo + 0, O); + O << ", "; + printU32(MI, OpNo + 1, O); + O << ", "; + printU32(MI, OpNo + 2, O); + O << ", "; + printU32(MI, OpNo + 3, O); + O << ')'; +} + +void HSAILInstPrinter::printV4F32(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << '('; + printF32(MI, OpNo + 0, O); + O << ", "; + printF32(MI, OpNo + 1, O); + O << ", "; + printF32(MI, OpNo + 2, O); + O << ", "; + printF32(MI, OpNo + 3, O); + O << ')'; +} + +void HSAILInstPrinter::printV4U64(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << '('; + printU64(MI, OpNo + 0, O); + O << ", "; + printU64(MI, OpNo + 1, O); + O << ", "; + printU64(MI, OpNo + 2, O); + O << ", "; + printU64(MI, OpNo + 3, O); + O << ')'; +} + +void HSAILInstPrinter::printV4F64(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << '('; + printF64(MI, OpNo + 0, O); + O << ", "; + printF64(MI, OpNo + 1, O); + O << ", "; + printF64(MI, OpNo + 2, O); + O << ", "; + printF64(MI, OpNo + 3, O); + O << ')'; +} + +void HSAILInstPrinter::printFTZ(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (MI->getOperand(OpNo).getImm()) + O << "_ftz"; +} + +void HSAILInstPrinter::printNoNull(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (MI->getOperand(OpNo).getImm()) + O << "_nonull"; +} + +void HSAILInstPrinter::printV4(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (MI->getOperand(OpNo).getImm()) + O << "_v4"; +} + +void HSAILInstPrinter::printBrigAlignment(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned Align = MI->getOperand(OpNo).getImm(); + if (Align != 1) + O << "_align(" << formatDec(Align) << ')'; +} + +static bool isNaturalAlignment(BrigType BT, unsigned Align) { + switch (Align) { + case 4: { + switch (BT) { + case BRIG_TYPE_U32: + case BRIG_TYPE_F32: + case BRIG_TYPE_B32: + case BRIG_TYPE_S32: + case BRIG_TYPE_U8X4: + case BRIG_TYPE_S8X4: + case BRIG_TYPE_F16X2: + case BRIG_TYPE_S16X2: + case BRIG_TYPE_U16X2: + return true; + default: + return false; + } + } + case 8: { + switch (BT) { + case BRIG_TYPE_U64: + case BRIG_TYPE_B64: + case BRIG_TYPE_F64: + case BRIG_TYPE_S64: + case BRIG_TYPE_F16X4: + case BRIG_TYPE_F32X2: + case BRIG_TYPE_ROIMG: + case BRIG_TYPE_RWIMG: + case BRIG_TYPE_S16X4: + case BRIG_TYPE_S32X2: + case BRIG_TYPE_S8X8: + case BRIG_TYPE_SAMP: + case BRIG_TYPE_SIG32: + case BRIG_TYPE_SIG64: + case BRIG_TYPE_U16X4: + case BRIG_TYPE_U32X2: + case BRIG_TYPE_U8X8: + case BRIG_TYPE_WOIMG: + return true; + default: + return false; + } + } + case 1: { + switch (BT) { + case BRIG_TYPE_B1: + case BRIG_TYPE_B8: + case BRIG_TYPE_S8: + case BRIG_TYPE_U8: + return true; + default: + return false; + } + } + case 2: { + switch (BT) { + case BRIG_TYPE_U16: + case BRIG_TYPE_B16: + case BRIG_TYPE_S16: + case BRIG_TYPE_F16: + return true; + default: + return false; + } + } + case 16: { + switch (BT) { + case BRIG_TYPE_B128: + case BRIG_TYPE_F16X8: + case BRIG_TYPE_F32X4: + case BRIG_TYPE_F64X2: + case BRIG_TYPE_S16X8: + case BRIG_TYPE_S32X4: + case BRIG_TYPE_S64X2: + case BRIG_TYPE_S8X16: + case BRIG_TYPE_U16X8: + case BRIG_TYPE_U32X4: + case BRIG_TYPE_U64X2: + case BRIG_TYPE_U8X16: + return true; + default: + return false; + } + } + default: + return false; + } +} + +void HSAILInstPrinter::printArgDeclAlignment(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned Align = MI->getOperand(OpNo).getImm(); + + int TypeLengthIdx = + HSAIL::getNamedOperandIdx(MI->getOpcode(), HSAIL::OpName::TypeLength); + BrigType BT = static_cast(MI->getOperand(TypeLengthIdx).getImm()); + + // Don't print align declaration if it uses the alignment implied in this + // context. This isn't necessary, but it matches what libHSAIL's disassembler + // produces. + if (!isNaturalAlignment(BT, Align)) + O << "align(" << formatDec(Align) << ") "; +} + +void HSAILInstPrinter::printArraySize(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + int64_t Size = MI->getOperand(OpNo).getImm(); + if (Size != 0) + O << '[' << Size << ']'; +} + +void HSAILInstPrinter::printEquiv(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned Equiv = MI->getOperand(OpNo).getImm(); + if (Equiv != 0) + O << "_equiv(" << formatDec(Equiv) << ')'; +} + +void HSAILInstPrinter::printBrigAllocation(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_ALLOCATION_NONE: + O << "NONE"; + break; + case BRIG_ALLOCATION_PROGRAM: + O << "PROGRAM"; + break; + case BRIG_ALLOCATION_AGENT: + O << "AGENT"; + break; + case BRIG_ALLOCATION_AUTOMATIC: + O << "AUTOMATIC"; + break; + } +} + +void HSAILInstPrinter::printBrigAluModifierMask(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_ALU_FTZ: + O << "FTZ"; + break; + } +} + +void HSAILInstPrinter::printBrigAtomicOperation(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_ATOMIC_ADD: + O << "_add"; + break; + case BRIG_ATOMIC_AND: + O << "_and"; + break; + case BRIG_ATOMIC_CAS: + O << "_cas"; + break; + case BRIG_ATOMIC_EXCH: + O << "_exch"; + break; + case BRIG_ATOMIC_LD: + O << "_ld"; + break; + case BRIG_ATOMIC_MAX: + O << "_max"; + break; + case BRIG_ATOMIC_MIN: + O << "_min"; + break; + case BRIG_ATOMIC_OR: + O << "_or"; + break; + case BRIG_ATOMIC_ST: + O << "_st"; + break; + case BRIG_ATOMIC_SUB: + O << "_sub"; + break; + case BRIG_ATOMIC_WRAPDEC: + O << "_wrapdec"; + break; + case BRIG_ATOMIC_WRAPINC: + O << "_wrapinc"; + break; + case BRIG_ATOMIC_XOR: + O << "_xor"; + break; + case BRIG_ATOMIC_WAIT_EQ: + O << "_wait_eq"; + break; + case BRIG_ATOMIC_WAIT_NE: + O << "_wait_ne"; + break; + case BRIG_ATOMIC_WAIT_LT: + O << "_wait_lt"; + break; + case BRIG_ATOMIC_WAIT_GTE: + O << "_wait_gte"; + break; + case BRIG_ATOMIC_WAITTIMEOUT_EQ: + O << "_waittimeout_eq"; + break; + case BRIG_ATOMIC_WAITTIMEOUT_NE: + O << "_waittimeout_ne"; + break; + case BRIG_ATOMIC_WAITTIMEOUT_LT: + O << "_waittimeout_lt"; + break; + case BRIG_ATOMIC_WAITTIMEOUT_GTE: + O << "_waittimeout_gte"; + break; + } +} + +void HSAILInstPrinter::printBrigCompareOperation(const MCInst *MI, + unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_COMPARE_EQ: + O << "_eq"; + break; + case BRIG_COMPARE_NE: + O << "_ne"; + break; + case BRIG_COMPARE_LT: + O << "_lt"; + break; + case BRIG_COMPARE_LE: + O << "_le"; + break; + case BRIG_COMPARE_GT: + O << "_gt"; + break; + case BRIG_COMPARE_GE: + O << "_ge"; + break; + case BRIG_COMPARE_EQU: + O << "_equ"; + break; + case BRIG_COMPARE_NEU: + O << "_neu"; + break; + case BRIG_COMPARE_LTU: + O << "_ltu"; + break; + case BRIG_COMPARE_LEU: + O << "_leu"; + break; + case BRIG_COMPARE_GTU: + O << "_gtu"; + break; + case BRIG_COMPARE_GEU: + O << "_geu"; + break; + case BRIG_COMPARE_NUM: + O << "_num"; + break; + case BRIG_COMPARE_NAN: + O << "_nan"; + break; + case BRIG_COMPARE_SEQ: + O << "_seq"; + break; + case BRIG_COMPARE_SNE: + O << "_sne"; + break; + case BRIG_COMPARE_SLT: + O << "_slt"; + break; + case BRIG_COMPARE_SLE: + O << "_sle"; + break; + case BRIG_COMPARE_SGT: + O << "_sgt"; + break; + case BRIG_COMPARE_SGE: + O << "_sge"; + break; + case BRIG_COMPARE_SGEU: + O << "_sgeu"; + break; + case BRIG_COMPARE_SEQU: + O << "_sequ"; + break; + case BRIG_COMPARE_SNEU: + O << "_sneu"; + break; + case BRIG_COMPARE_SLTU: + O << "_sltu"; + break; + case BRIG_COMPARE_SLEU: + O << "_sleu"; + break; + case BRIG_COMPARE_SNUM: + O << "_snum"; + break; + case BRIG_COMPARE_SNAN: + O << "_snan"; + break; + case BRIG_COMPARE_SGTU: + O << "_sgtu"; + break; + } +} + +void HSAILInstPrinter::printBrigControlDirective(const MCInst *MI, + unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_CONTROL_ENABLEBREAKEXCEPTIONS: + O << "enablebreakexceptions"; + break; + case BRIG_CONTROL_ENABLEDETECTEXCEPTIONS: + O << "enabledetectexceptions"; + break; + case BRIG_CONTROL_MAXDYNAMICGROUPSIZE: + O << "maxdynamicgroupsize"; + break; + case BRIG_CONTROL_MAXFLATGRIDSIZE: + O << "maxflatgridsize"; + break; + case BRIG_CONTROL_MAXFLATWORKGROUPSIZE: + O << "maxflatworkgroupsize"; + break; + case BRIG_CONTROL_REQUIREDDIM: + O << "requireddim"; + break; + case BRIG_CONTROL_REQUIREDGRIDSIZE: + O << "requiredgridsize"; + break; + case BRIG_CONTROL_REQUIREDWORKGROUPSIZE: + O << "requiredworkgroupsize"; + break; + case BRIG_CONTROL_REQUIRENOPARTIALWORKGROUPS: + O << "requirenopartialworkgroups"; + break; + } +} + +void HSAILInstPrinter::printBrigExecutableModifierMask(const MCInst *MI, + unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_EXECUTABLE_DEFINITION: + O << "DEFINITION"; + break; + } +} + +void HSAILInstPrinter::printBrigImageChannelOrder(const MCInst *MI, + unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_CHANNEL_ORDER_A: + O << "a"; + break; + case BRIG_CHANNEL_ORDER_R: + O << "r"; + break; + case BRIG_CHANNEL_ORDER_RX: + O << "rx"; + break; + case BRIG_CHANNEL_ORDER_RG: + O << "rg"; + break; + case BRIG_CHANNEL_ORDER_RGX: + O << "rgx"; + break; + case BRIG_CHANNEL_ORDER_RA: + O << "ra"; + break; + case BRIG_CHANNEL_ORDER_RGB: + O << "rgb"; + break; + case BRIG_CHANNEL_ORDER_RGBX: + O << "rgbx"; + break; + case BRIG_CHANNEL_ORDER_RGBA: + O << "rgba"; + break; + case BRIG_CHANNEL_ORDER_BGRA: + O << "bgra"; + break; + case BRIG_CHANNEL_ORDER_ARGB: + O << "argb"; + break; + case BRIG_CHANNEL_ORDER_ABGR: + O << "abgr"; + break; + case BRIG_CHANNEL_ORDER_SRGB: + O << "srgb"; + break; + case BRIG_CHANNEL_ORDER_SRGBX: + O << "srgbx"; + break; + case BRIG_CHANNEL_ORDER_SRGBA: + O << "srgba"; + break; + case BRIG_CHANNEL_ORDER_SBGRA: + O << "sbgra"; + break; + case BRIG_CHANNEL_ORDER_INTENSITY: + O << "intensity"; + break; + case BRIG_CHANNEL_ORDER_LUMINANCE: + O << "luminance"; + break; + case BRIG_CHANNEL_ORDER_DEPTH: + O << "depth"; + break; + case BRIG_CHANNEL_ORDER_DEPTH_STENCIL: + O << "depth_stencil"; + break; + } +} + +void HSAILInstPrinter::printBrigImageChannelType(const MCInst *MI, + unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_CHANNEL_TYPE_SNORM_INT8: + O << "snorm_int8"; + break; + case BRIG_CHANNEL_TYPE_SNORM_INT16: + O << "snorm_int16"; + break; + case BRIG_CHANNEL_TYPE_UNORM_INT8: + O << "unorm_int8"; + break; + case BRIG_CHANNEL_TYPE_UNORM_INT16: + O << "unorm_int16"; + break; + case BRIG_CHANNEL_TYPE_UNORM_INT24: + O << "unorm_int24"; + break; + case BRIG_CHANNEL_TYPE_UNORM_SHORT_555: + O << "unorm_short_555"; + break; + case BRIG_CHANNEL_TYPE_UNORM_SHORT_565: + O << "unorm_short_565"; + break; + case BRIG_CHANNEL_TYPE_UNORM_INT_101010: + O << "unorm_int_101010"; + break; + case BRIG_CHANNEL_TYPE_SIGNED_INT8: + O << "signed_int8"; + break; + case BRIG_CHANNEL_TYPE_SIGNED_INT16: + O << "signed_int16"; + break; + case BRIG_CHANNEL_TYPE_SIGNED_INT32: + O << "signed_int32"; + break; + case BRIG_CHANNEL_TYPE_UNSIGNED_INT8: + O << "unsigned_int8"; + break; + case BRIG_CHANNEL_TYPE_UNSIGNED_INT16: + O << "unsigned_int16"; + break; + case BRIG_CHANNEL_TYPE_UNSIGNED_INT32: + O << "unsigned_int32"; + break; + case BRIG_CHANNEL_TYPE_HALF_FLOAT: + O << "half_float"; + break; + case BRIG_CHANNEL_TYPE_FLOAT: + O << "float"; + break; + } +} + +void HSAILInstPrinter::printBrigImageGeometry(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_GEOMETRY_1D: + O << "_1d"; + break; + case BRIG_GEOMETRY_2D: + O << "_2d"; + break; + case BRIG_GEOMETRY_3D: + O << "_3d"; + break; + case BRIG_GEOMETRY_1DA: + O << "_1da"; + break; + case BRIG_GEOMETRY_2DA: + O << "_2da"; + break; + case BRIG_GEOMETRY_1DB: + O << "_1db"; + break; + case BRIG_GEOMETRY_2DDEPTH: + O << "_2ddepth"; + break; + case BRIG_GEOMETRY_2DADEPTH: + O << "_2dadepth"; + break; + case BRIG_GEOMETRY_UNKNOWN: + O << "_unknown"; + break; + } +} + +void HSAILInstPrinter::printBrigImageQuery(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_IMAGE_QUERY_WIDTH: + O << "width"; + break; + case BRIG_IMAGE_QUERY_HEIGHT: + O << "height"; + break; + case BRIG_IMAGE_QUERY_DEPTH: + O << "depth"; + break; + case BRIG_IMAGE_QUERY_ARRAY: + O << "array"; + break; + case BRIG_IMAGE_QUERY_CHANNELORDER: + O << "channelorder"; + break; + case BRIG_IMAGE_QUERY_CHANNELTYPE: + O << "channeltype"; + break; + } +} + +void HSAILInstPrinter::printBrigLinkage(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_LINKAGE_NONE: + O << "NONE"; + break; + case BRIG_LINKAGE_PROGRAM: + O << "PROGRAM"; + break; + case BRIG_LINKAGE_MODULE: + O << "MODULE"; + break; + case BRIG_LINKAGE_FUNCTION: + O << "FUNCTION"; + break; + case BRIG_LINKAGE_ARG: + O << "ARG"; + break; + } +} + +void HSAILInstPrinter::printBrigMachineModel(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_MACHINE_SMALL: + O << "$small"; + break; + case BRIG_MACHINE_LARGE: + O << "$large"; + break; + } +} + +void HSAILInstPrinter::printBrigMemoryModifierMask(const MCInst *MI, + unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_MEMORY_CONST: + O << "CONST"; + break; + } +} + +void HSAILInstPrinter::printBrigMemoryOrder(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_MEMORY_ORDER_NONE: + O << "_"; + break; + case BRIG_MEMORY_ORDER_RELAXED: + O << "_rlx"; + break; + case BRIG_MEMORY_ORDER_SC_ACQUIRE: + O << "_scacq"; + break; + case BRIG_MEMORY_ORDER_SC_RELEASE: + O << "_screl"; + break; + case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE: + O << "_scar"; + break; + } +} + +void HSAILInstPrinter::printBrigMemoryScope(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_MEMORY_SCOPE_WAVEFRONT: + O << "_wave"; + break; + case BRIG_MEMORY_SCOPE_WORKGROUP: + O << "_wg"; + break; + case BRIG_MEMORY_SCOPE_AGENT: + O << "_agent"; + break; + case BRIG_MEMORY_SCOPE_SYSTEM: + O << "_system"; + break; + } +} + +void HSAILInstPrinter::printBrigPack(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_PACK_NONE: + O << "_"; + break; + case BRIG_PACK_PP: + O << "_pp"; + break; + case BRIG_PACK_PS: + O << "_ps"; + break; + case BRIG_PACK_SP: + O << "_sp"; + break; + case BRIG_PACK_SS: + O << "_ss"; + break; + case BRIG_PACK_S: + O << "_s"; + break; + case BRIG_PACK_P: + O << "_p"; + break; + case BRIG_PACK_PPSAT: + O << "_pp_sat"; + break; + case BRIG_PACK_PSSAT: + O << "_ps_sat"; + break; + case BRIG_PACK_SPSAT: + O << "_sp_sat"; + break; + case BRIG_PACK_SSSAT: + O << "_ss_sat"; + break; + case BRIG_PACK_SSAT: + O << "_s_sat"; + break; + case BRIG_PACK_PSAT: + O << "_p_sat"; + break; + } +} + +void HSAILInstPrinter::printBrigProfile(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_PROFILE_BASE: + O << "$base"; + break; + case BRIG_PROFILE_FULL: + O << "$full"; + break; + } +} + +void HSAILInstPrinter::printBrigRound(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_ROUND_NONE: + break; + case BRIG_ROUND_FLOAT_DEFAULT: + break; + case BRIG_ROUND_FLOAT_NEAR_EVEN: + O << "_near"; + break; + case BRIG_ROUND_FLOAT_ZERO: + O << "_zero"; + break; + case BRIG_ROUND_FLOAT_PLUS_INFINITY: + O << "_up"; + break; + case BRIG_ROUND_FLOAT_MINUS_INFINITY: + O << "_down"; + break; + case BRIG_ROUND_INTEGER_NEAR_EVEN: + O << "_neari"; + break; + case BRIG_ROUND_INTEGER_ZERO: + // This is the default for integer ops, omit it. + // O << "_zeroi"; + break; + case BRIG_ROUND_INTEGER_PLUS_INFINITY: + O << "_upi"; + break; + case BRIG_ROUND_INTEGER_MINUS_INFINITY: + O << "_downi"; + break; + case BRIG_ROUND_INTEGER_NEAR_EVEN_SAT: + O << "_neari_sat"; + break; + case BRIG_ROUND_INTEGER_ZERO_SAT: + O << "_zeroi_sat"; + break; + case BRIG_ROUND_INTEGER_PLUS_INFINITY_SAT: + O << "_upi_sat"; + break; + case BRIG_ROUND_INTEGER_MINUS_INFINITY_SAT: + O << "_downi_sat"; + break; + case BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN: + O << "_sneari"; + break; + case BRIG_ROUND_INTEGER_SIGNALING_ZERO: + O << "_szeroi"; + break; + case BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY: + O << "_supi"; + break; + case BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY: + O << "_sdowni"; + break; + case BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN_SAT: + O << "_sneari_sat"; + break; + case BRIG_ROUND_INTEGER_SIGNALING_ZERO_SAT: + O << "_szeroi_sat"; + break; + case BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY_SAT: + O << "_supi_sat"; + break; + case BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY_SAT: + O << "_sdowni_sat"; + break; + } +} + +void HSAILInstPrinter::printBrigSamplerAddressing(const MCInst *MI, + unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_ADDRESSING_UNDEFINED: + O << "UNDEFINED"; + break; + case BRIG_ADDRESSING_CLAMP_TO_EDGE: + O << "CLAMP_TO_EDGE"; + break; + case BRIG_ADDRESSING_CLAMP_TO_BORDER: + O << "CLAMP_TO_BORDER"; + break; + case BRIG_ADDRESSING_REPEAT: + O << "REPEAT"; + break; + case BRIG_ADDRESSING_MIRRORED_REPEAT: + O << "MIRRORED_REPEAT"; + break; + } +} + +void HSAILInstPrinter::printBrigSamplerCoordNormalization(const MCInst *MI, + unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_COORD_UNNORMALIZED: + O << "unnormalized"; + break; + case BRIG_COORD_NORMALIZED: + O << "normalized"; + break; + } +} + +void HSAILInstPrinter::printBrigSamplerFilter(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_FILTER_NEAREST: + O << "nearest"; + break; + case BRIG_FILTER_LINEAR: + O << "linear"; + break; + } +} + +void HSAILInstPrinter::printBrigSamplerQuery(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_SAMPLER_QUERY_ADDRESSING: + O << "addressing"; + break; + case BRIG_SAMPLER_QUERY_COORD: + O << "coord"; + break; + case BRIG_SAMPLER_QUERY_FILTER: + O << "filter"; + break; + } +} + +void HSAILInstPrinter::printBrigSegCvtModifierMask(const MCInst *MI, + unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_SEG_CVT_NONULL: + O << "_nonull"; + break; + } +} + +void HSAILInstPrinter::printBrigSegment(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case HSAILAS::FLAT_ADDRESS: + // Assumed default. + break; + case HSAILAS::GLOBAL_ADDRESS: + O << "_global"; + break; + case HSAILAS::READONLY_ADDRESS: + O << "_readonly"; + break; + case HSAILAS::KERNARG_ADDRESS: + O << "_kernarg"; + break; + case HSAILAS::GROUP_ADDRESS: + O << "_group"; + break; + case HSAILAS::PRIVATE_ADDRESS: + O << "_private"; + break; + case HSAILAS::SPILL_ADDRESS: + O << "_spill"; + break; + case HSAILAS::ARG_ADDRESS: + O << "_arg"; + break; + case HSAILAS::REGION_ADDRESS: { + // For now, the only non-flat implied segment appears to be region. + const MCInstrDesc &Desc = MII.get(MI->getOpcode()); + if (Desc.TSFlags & HSAILInstrFlags::HasDefaultSegment) + break; + + O << "_region"; + break; + } + default: + llvm_unreachable("bad segment value"); + } + +#if 0 + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_SEGMENT_GLOBAL: + O << "_global"; + break; + case BRIG_SEGMENT_READONLY: + O << "_readonly"; + break; + case BRIG_SEGMENT_KERNARG: + O << "_kernarg"; + break; + case BRIG_SEGMENT_GROUP: + O << "_group"; + break; + case BRIG_SEGMENT_PRIVATE: + O << "_private"; + break; + case BRIG_SEGMENT_SPILL: + O << "_spill"; + break; + case BRIG_SEGMENT_ARG: + O << "_arg"; + break; + case BRIG_SEGMENT_AMD_GCN: + O << "_region"; + break; + default: + llvm_unreachable("bad segment value"); + } +#endif +} + +void HSAILInstPrinter::printBrigType(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_TYPE_U8: + O << "_u8"; + break; + case BRIG_TYPE_U16: + O << "_u16"; + break; + case BRIG_TYPE_U32: + O << "_u32"; + break; + case BRIG_TYPE_U64: + O << "_u64"; + break; + case BRIG_TYPE_S8: + O << "_s8"; + break; + case BRIG_TYPE_S16: + O << "_s16"; + break; + case BRIG_TYPE_S32: + O << "_s32"; + break; + case BRIG_TYPE_S64: + O << "_s64"; + break; + case BRIG_TYPE_F16: + O << "_f16"; + break; + case BRIG_TYPE_F32: + O << "_f32"; + break; + case BRIG_TYPE_F64: + O << "_f64"; + break; + case BRIG_TYPE_B1: + O << "_b1"; + break; + case BRIG_TYPE_B8: + O << "_b8"; + break; + case BRIG_TYPE_B16: + O << "_b16"; + break; + case BRIG_TYPE_B32: + O << "_b32"; + break; + case BRIG_TYPE_B64: + O << "_b64"; + break; + case BRIG_TYPE_B128: + O << "_b128"; + break; + case BRIG_TYPE_SAMP: + O << "_samp"; + break; + case BRIG_TYPE_ROIMG: + O << "_roimg"; + break; + case BRIG_TYPE_WOIMG: + O << "_woimg"; + break; + case BRIG_TYPE_RWIMG: + O << "_rwimg"; + break; + case BRIG_TYPE_SIG32: + O << "_sig32"; + break; + case BRIG_TYPE_SIG64: + O << "_sig64"; + break; + case BRIG_TYPE_U8X4: + O << "_u8x4"; + break; + case BRIG_TYPE_U8X8: + O << "_u8x8"; + break; + case BRIG_TYPE_U8X16: + O << "_u8x16"; + break; + case BRIG_TYPE_U16X2: + O << "_u16x2"; + break; + case BRIG_TYPE_U16X4: + O << "_u16x4"; + break; + case BRIG_TYPE_U16X8: + O << "_u16x8"; + break; + case BRIG_TYPE_U32X2: + O << "_u32x2"; + break; + case BRIG_TYPE_U32X4: + O << "_u32x4"; + break; + case BRIG_TYPE_U64X2: + O << "_u64x2"; + break; + case BRIG_TYPE_S8X4: + O << "_s8x4"; + break; + case BRIG_TYPE_S8X8: + O << "_s8x8"; + break; + case BRIG_TYPE_S8X16: + O << "_s8x16"; + break; + case BRIG_TYPE_S16X2: + O << "_s16x2"; + break; + case BRIG_TYPE_S16X4: + O << "_s16x4"; + break; + case BRIG_TYPE_S16X8: + O << "_s16x8"; + break; + case BRIG_TYPE_S32X2: + O << "_s32x2"; + break; + case BRIG_TYPE_S32X4: + O << "_s32x4"; + break; + case BRIG_TYPE_S64X2: + O << "_s64x2"; + break; + case BRIG_TYPE_F16X2: + O << "_f16x2"; + break; + case BRIG_TYPE_F16X4: + O << "_f16x4"; + break; + case BRIG_TYPE_F16X8: + O << "_f16x8"; + break; + case BRIG_TYPE_F32X2: + O << "_f32x2"; + break; + case BRIG_TYPE_F32X4: + O << "_f32x4"; + break; + case BRIG_TYPE_F64X2: + O << "_f64x2"; + break; + } +} + +void HSAILInstPrinter::printBrigVariableModifierMask(const MCInst *MI, + unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_VARIABLE_DEFINITION: + O << "DEFINITION"; + break; + case BRIG_VARIABLE_CONST: + O << "CONST"; + break; + } +} + +static void printBrigWidthImpl(raw_ostream &O, unsigned Width) { + switch (Width) { + case BRIG_WIDTH_NONE: + O << "_width(NONE)"; + break; + case BRIG_WIDTH_1: + O << "_width(1)"; + break; + case BRIG_WIDTH_2: + O << "_width(2)"; + break; + case BRIG_WIDTH_4: + O << "_width(4)"; + break; + case BRIG_WIDTH_8: + O << "_width(8)"; + break; + case BRIG_WIDTH_16: + O << "_width(16)"; + break; + case BRIG_WIDTH_32: + O << "_width(32)"; + break; + case BRIG_WIDTH_64: + O << "_width(64)"; + break; + case BRIG_WIDTH_128: + O << "_width(128)"; + break; + case BRIG_WIDTH_256: + O << "_width(256)"; + break; + case BRIG_WIDTH_512: + O << "_width(512)"; + break; + case BRIG_WIDTH_1024: + O << "_width(1024)"; + break; + case BRIG_WIDTH_2048: + O << "_width(2048)"; + break; + case BRIG_WIDTH_4096: + O << "_width(4096)"; + break; + case BRIG_WIDTH_8192: + O << "_width(8192)"; + break; + case BRIG_WIDTH_16384: + O << "_width(16384)"; + break; + case BRIG_WIDTH_32768: + O << "_width(32768)"; + break; + case BRIG_WIDTH_65536: + O << "_width(65536)"; + break; + case BRIG_WIDTH_131072: + O << "_width(131072)"; + break; + case BRIG_WIDTH_262144: + O << "_width(262144)"; + break; + case BRIG_WIDTH_524288: + O << "_width(524288)"; + break; + case BRIG_WIDTH_1048576: + O << "_width(1048576)"; + break; + case BRIG_WIDTH_2097152: + O << "_width(2097152)"; + break; + case BRIG_WIDTH_4194304: + O << "_width(4194304)"; + break; + case BRIG_WIDTH_8388608: + O << "_width(8388608)"; + break; + case BRIG_WIDTH_16777216: + O << "_width(16777216)"; + break; + case BRIG_WIDTH_33554432: + O << "_width(33554432)"; + break; + case BRIG_WIDTH_67108864: + O << "_width(67108864)"; + break; + case BRIG_WIDTH_134217728: + O << "_width(134217728)"; + break; + case BRIG_WIDTH_268435456: + O << "_width(268435456)"; + break; + case BRIG_WIDTH_536870912: + O << "_width(536870912)"; + break; + case BRIG_WIDTH_1073741824: + O << "_width(1073741824)"; + break; + case BRIG_WIDTH_2147483648: + O << "_width(2147483648)"; + break; + case BRIG_WIDTH_WAVESIZE: + O << "_width(WAVESIZE)"; + break; + case BRIG_WIDTH_ALL: + O << "_width(all)"; + break; + } +} + +void HSAILInstPrinter::printBrigWidth(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned Width = MI->getOperand(OpNo).getImm(); + + const MCInstrDesc &Desc = MII.get(MI->getOpcode()); + uint32_t DefaultWidth = (Desc.TSFlags & HSAILInstrFlags::WidthAttr) >> + Log2_32(HSAILInstrFlags::WidthAttrLo); + + // Don't print the width modifier if it is the default for the instruction. + switch (DefaultWidth) { + case HSAILWidthAttrFlags::WidthAttrOne: + if (Width != BRIG_WIDTH_1) + printBrigWidthImpl(O, Width); + return; + + case HSAILWidthAttrFlags::WidthAttrAll: + if (Width != BRIG_WIDTH_ALL) + printBrigWidthImpl(O, Width); + return; + + case HSAILWidthAttrFlags::WidthAttrWaveSize: + if (Width != BRIG_WIDTH_WAVESIZE) + printBrigWidthImpl(O, Width); + return; + + case HSAILWidthAttrFlags::WidthAttrNone: + printBrigWidthImpl(O, Width); + return; + } +} + +void HSAILInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + O << getRegisterName(Op.getReg()); + } else if (Op.isExpr()) { + const MCExpr *Exp = Op.getExpr(); + Exp->print(O, &MAI); + } else if (Op.isImm() || Op.isFPImm()) { + llvm_unreachable("Immediate should have been handled by special printer"); + } else { + llvm_unreachable("unknown operand type in printOperand"); + } +} + +#include "HSAILGenAsmWriter.inc" Index: lib/Target/HSAIL/InstPrinter/LLVMBuild.txt =================================================================== --- /dev/null +++ lib/Target/HSAIL/InstPrinter/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/HSAIL/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = HSAILAsmPrinter +parent = HSAIL +required_libraries = MC Support +add_to_library_groups = HSAIL + Index: lib/Target/HSAIL/LLVMBuild.txt =================================================================== --- /dev/null +++ lib/Target/HSAIL/LLVMBuild.txt @@ -0,0 +1,32 @@ +;===- ./lib/Target/HSAIL/LLVMBuild.txt -------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[common] +subdirectories = InstPrinter MCTargetDesc TargetInfo + +[component_0] +type = TargetGroup +name = HSAIL +parent = Target +has_asmprinter = 1 + +[component_1] +type = Library +name = HSAILCodeGen +parent = HSAIL +required_libraries = AsmPrinter CodeGen Core IPO HSAILAsmPrinter HSAILDesc MC SelectionDAG Support Target +add_to_library_groups = HSAIL Index: lib/Target/HSAIL/MCTargetDesc/BRIGDwarfStreamer.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/BRIGDwarfStreamer.h @@ -0,0 +1,43 @@ +//===-- BRIGDwarfStreamer.h -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef BRIG_DWARF_STREAMER +#define BRIG_DWARF_STREAMER + +#include "llvm/MC/MCELFStreamer.h" + +namespace llvm { + +class RawVectorOstream; + +class BRIGDwarfStreamer : public MCELFStreamer { +private: + RawVectorOstream *dwarfStream; + +public: + BRIGDwarfStreamer(MCContext &Context, MCAsmBackend &TAB, + RawVectorOstream &RVOS, MCCodeEmitter *Emitter); + + void InitSections(bool NoExecStack) override; + void Finish(); + + RawVectorOstream *getDwarfStream(); + + // support for LLVM-style RTTI operations like dyn_cast + inline static bool classof(const BRIGDwarfStreamer *) { return true; } + inline static bool classof(const MCStreamer *streamer) { return true; } +}; + +MCStreamer *createBRIGDwarfStreamer(MCContext &Context, MCAsmBackend &MAB, + RawVectorOstream &RVOS, MCCodeEmitter *CE, + bool RelaxAll); + +} // namespace llvm + +#endif Index: lib/Target/HSAIL/MCTargetDesc/BRIGDwarfStreamer.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/BRIGDwarfStreamer.cpp @@ -0,0 +1,66 @@ +//===-- BRIGDwarfStreamer.cpp ---------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "BRIGDwarfStreamer.h" + +#include "RawVectorOstream.h" + +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" + + +using namespace llvm; + +BRIGDwarfStreamer::BRIGDwarfStreamer(MCContext &Context, MCAsmBackend &TAB, + RawVectorOstream &RVOS, + MCCodeEmitter *Emitter) + : MCELFStreamer(Context, TAB, RVOS, Emitter), + dwarfStream(&RVOS) {} + +RawVectorOstream *BRIGDwarfStreamer::getDwarfStream() { + raw_ostream &Strm = getAssembler().getWriter().getStream(); + + // We must ensure MC layer is writing to the same stream. + assert(&Strm == static_cast(dwarfStream) && + "MC layer doesn't write to DWARF stream"); + return dwarfStream; +} + +void BRIGDwarfStreamer::InitSections(bool NoExecStack) { + const MCSectionELF *codeSection = getContext().getELFSection( + ".brigcode", ELF::SHT_NOBITS, 0); + const MCSectionELF *directivesSection = getContext().getELFSection( + ".brigdirectives", ELF::SHT_NOBITS, 0); + SwitchSection(codeSection); + SwitchSection(directivesSection); + SwitchSection(codeSection); +} + +#if 0 +MCStreamer::MCStreamerKind BRIGDwarfStreamer::getStreamerKind() const { + return MCStreamer::BRIGDwarfStreamer; +} +#endif + +void BRIGDwarfStreamer::Finish() { + MCELFStreamer::Finish(); + // flush all DWARF data captured + dwarfStream->flush(); + // stop writing to another stream, if any provided + dwarfStream->releaseStream(); +} + +MCStreamer *llvm::createBRIGDwarfStreamer(MCContext &Context, MCAsmBackend &MAB, + RawVectorOstream &RVOS, + MCCodeEmitter *CE, bool RelaxAll) { + BRIGDwarfStreamer *S = new BRIGDwarfStreamer(Context, MAB, RVOS, CE); + if (RelaxAll) + S->getAssembler().setRelaxAll(true); + return S; +} Index: lib/Target/HSAIL/MCTargetDesc/CMakeLists.txt =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,16 @@ + +add_llvm_library(LLVMHSAILDesc + BRIGDwarfStreamer.cpp + + HSAILAsmBackend.cpp + HSAILELFObjectWriter.cpp + HSAILMCCodeEmitter.cpp + HSAILMCTargetDesc.cpp + HSAILTargetStreamer.cpp + HSAILMCAsmInfo.cpp + RawVectorOstream.cpp + ) + +# FIXME: How does this work for every other target? None of them need +# to specify this dependency. +target_link_libraries(LLVMHSAILDesc PRIVATE LLVMHSAILAsmPrinter) Index: lib/Target/HSAIL/MCTargetDesc/HSAILAsmBackend.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/HSAILAsmBackend.h @@ -0,0 +1,72 @@ +//===-- HSAILAsmBackend.h - HSAIL Assembler Backend -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef _HSAIL_ASM_BACKEND_H_ +#define _HSAIL_ASM_BACKEND_H_ + +#include "HSAILELFObjectWriter.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCSectionELF.h" + +using namespace llvm; + +namespace { +class HSAILAsmBackend : public MCAsmBackend { +public: + HSAILAsmBackend(const Target &T); + + unsigned getNumFixupKinds() const override { + assert(!"When do we hit this?"); + return 0; + } + + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; + + void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t Value, bool IsPCRel) const override; + + bool mayNeedRelaxation(const MCInst &Inst) const override; + + bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const override; + + void relaxInstruction(const MCInst &Inst, MCInst &Res) const override; + + bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; +}; + +class ELFHSAILAsmBackend : public HSAILAsmBackend { +public: + ELFHSAILAsmBackend(const Target &T) : HSAILAsmBackend(T) {} +}; + +class ELFHSAIL_32AsmBackend : public ELFHSAILAsmBackend { +public: + ELFHSAIL_32AsmBackend(const Target &T) : ELFHSAILAsmBackend(T) {} + + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + return createELFObjectWriter( + new HSAILELFObjectWriter(false, ELF::EM_HSAIL, false), OS, + /*IsLittleEndian*/ true); + } +}; + +class ELFHSAIL_64AsmBackend : public ELFHSAILAsmBackend { +public: + ELFHSAIL_64AsmBackend(const Target &T) : ELFHSAILAsmBackend(T) {} + + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + return createELFObjectWriter( + new HSAILELFObjectWriter(true, ELF::EM_HSAIL_64, false), OS, + /*IsLittleEndian*/ true); + } +}; +} // end anonymous namespace + +#endif // _HSAIL_ASM_BACKEND_H_ Index: lib/Target/HSAIL/MCTargetDesc/HSAILAsmBackend.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/HSAILAsmBackend.cpp @@ -0,0 +1,111 @@ +//===-- HSAILAsmBackend.h - HSAIL Assembler Backend -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HSAIL.h" +#include "HSAILAsmBackend.h" +using namespace llvm; + +HSAILAsmBackend::HSAILAsmBackend(const Target &T) {} + +/// createObjectWriter - Create a new MCObjectWriter instance for use by the +/// assembler backend to emit the final object file. +MCObjectWriter *HSAILAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const { + assert(!"When do we hit this?"); + return nullptr; +} + +// pulled from x86asmbackend.cpp, used in ApplyFixup +// +static unsigned getFixupKindLog2Size(unsigned Kind) { + switch (Kind) { + default: + llvm_unreachable("Invalid fixup kind!"); + case FK_PCRel_1: + case FK_Data_1: + return 0; + case FK_PCRel_2: + case FK_Data_2: + return 1; + case FK_PCRel_4: + // case X86::reloc_riprel_4byte: + // case X86::reloc_riprel_4byte_movq_load: + // case X86::reloc_signed_4byte: + // case X86::reloc_global_offset_table: + case FK_Data_4: + return 2; + case FK_PCRel_8: + case FK_Data_8: + return 3; + } +} + +/// applyFixup - Apply the \arg Value for given \arg Fixup into the provided +/// data fragment, at the offset specified by the fixup and following the +/// fixup kind as appropriate. +void HSAILAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, + unsigned DataSize, uint64_t Value, + bool IsPCRel) const { + // pulled from x86asmbackend.cpp + unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind()); + + assert(Fixup.getOffset() + Size <= DataSize && "Invalid fixup offset!"); + + // Check that uppper bits are either all zeros or all ones. + // Specifically ignore overflow/underflow as long as the leakage is + // limited to the lower bits. This is to remain compatible with + // other assemblers. + assert(isIntN(Size * 8 + 1, Value) && + "Value does not fit in the Fixup field"); + for (unsigned i = 0; i != Size; ++i) + Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8)); +} + +/// mayNeedRelaxation - Check whether the given instruction may need +/// relaxation. +/// \arg Inst - The instruction to test. +/// \arg Fixups - The actual fixups this instruction encoded to, for potential +/// use by the target backend. +bool HSAILAsmBackend::mayNeedRelaxation(const MCInst &Inst) const { + return false; +} + +/// fixupNeedsRelaxation - Target specific predicate for whether a given +/// fixup requires the associated instruction to be relaxed. +bool HSAILAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const { + assert(!"When do we hit this?"); + return false; +} + +/// relaxInstruction - Relax the instruction in the given fragment to the next +/// wider instruction. +void HSAILAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const { + assert(!"When do we hit this?"); +} + +/// writeNopData - Write an (optimal) nop sequence of Count bytes to the given +/// output. If the target cannot generate such a sequence, it should return an +/// error. +/// \return - True on success. +bool HSAILAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { + return true; +} + +MCAsmBackend *llvm::createHSAIL32AsmBackend(const Target &T, + const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU) { + return new ELFHSAIL_32AsmBackend(T); +} + +MCAsmBackend *llvm::createHSAIL64AsmBackend(const Target &T, + const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU) { + return new ELFHSAIL_64AsmBackend(T); +} Index: lib/Target/HSAIL/MCTargetDesc/HSAILELFObjectWriter.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/HSAILELFObjectWriter.h @@ -0,0 +1,28 @@ +//===-- HSAILELFObjectWriter.h - HSAIL ELF Object Writer --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef _HSAIL_ELF_OBJECT_WRITER_H_ +#define _HSAIL_ELF_OBJECT_WRITER_H_ + +#include "llvm/MC/MCELFObjectWriter.h" + +namespace llvm { +class HSAILELFObjectWriter : public MCELFObjectTargetWriter { +public: + HSAILELFObjectWriter(bool IsLargeModel, uint16_t EMachine, + bool HasRelocationAddend); + +protected: + unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel) const override { + return 0; // currently return 0, which means no relocation + } +}; +} + +#endif // _HSAIL_ELF_OBJECT_WRITER_H_ Index: lib/Target/HSAIL/MCTargetDesc/HSAILELFObjectWriter.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/HSAILELFObjectWriter.cpp @@ -0,0 +1,15 @@ +//===-- HSAILELFObjectWriter.cpp - HSAIL ELF Object Writer ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HSAILELFObjectWriter.h" +using namespace llvm; + +HSAILELFObjectWriter::HSAILELFObjectWriter(bool IsLargeModel, uint16_t EMachine, + bool HasRelocationAddend) + : MCELFObjectTargetWriter(IsLargeModel, 0, EMachine, HasRelocationAddend) {} Index: lib/Target/HSAIL/MCTargetDesc/HSAILMCAsmInfo.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/HSAILMCAsmInfo.h @@ -0,0 +1,30 @@ +//===-- HSAILMCAsmInfo.h - HSAIL asm properties -----------------*- C++ -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the HSAILMCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef _HSAIL_MC_ASM_INFO_H_ +#define _HSAIL_MC_ASM_INFO_H_ + +#include "llvm/MC/MCAsmInfo.h" + +namespace llvm { +class Triple; + +struct HSAILELFMCAsmInfo : public MCAsmInfo { + explicit HSAILELFMCAsmInfo(StringRef &Triple); + const MCSection *getNonexecutableStackSection(MCContext &Ctx) const override; + + bool isValidUnquotedName(StringRef Name) const override; +}; +} // namespace llvm + +#endif // _HSAIL_MC_ASM_INFO_H_ Index: lib/Target/HSAIL/MCTargetDesc/HSAILMCAsmInfo.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/HSAILMCAsmInfo.cpp @@ -0,0 +1,92 @@ +//===-- HSAILMCAsmInfo.cpp - HSAIL asm properties -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the HSAILMCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "HSAILMCAsmInfo.h" +#include "HSAILTargetMachine.h" +#include "llvm/ADT/Triple.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ELF.h" +using namespace llvm; + +HSAILELFMCAsmInfo::HSAILELFMCAsmInfo(StringRef &T) { + PrivateGlobalPrefix = "&"; + PrivateLabelPrefix = "@"; + GlobalDirective = "global"; + SupportsQuotedNames = false; + HasDotTypeDotSizeDirective = false; + HasSingleParameterDotFile = false; + + // We must set SupportsDebugInformation to true in order for debug info to + // be generated. This shouldn't cause unwanted output, because if the FE + // does not produce debug metadata (no -g option) then there won't be (much) + // debug info generated. + // TODO: we may need to especially ensure that when -g is not passed to the + // FE, + // BRIGAsmPrinter does not create large ".text", etc., sections in + // order to + // save space and I/O time. + // + + // FIXME: Setting SupportsDebugInformation to true causes an assertion + // failure in the AsmPrinter() destructor. + // Assertion `!DD && Handlers.empty() && "Debug/EH info didn't get finalized"' + // failed. + SupportsDebugInformation = false; + + PointerSize = Triple(T).getArch() == Triple::hsail64 ? 8 : 4; + + ExceptionsType = ExceptionHandling::None; + // DwarfRequiresFrameSection = false; + CommentString = "//"; + Data8bitsDirective = "sectiondata_b8\t"; + Data16bitsDirective = "sectiondata_b16\t"; + Data32bitsDirective = "sectiondata_b32\t"; + Data64bitsDirective = "sectiondata_b64\t"; +} + +const MCSection * +HSAILELFMCAsmInfo::getNonexecutableStackSection(MCContext &Ctx) const { + return nullptr; +} + +static bool isValidChar(char C) { + return (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') || + (C >= '0' && C <= '9') || C == '_' || C == '$' || C == '.' || C == '@'; +} + +static bool isValidFirstChar(char C) { + return isValidChar(C) && C != '.' && !(C >= '0' && C <= '9'); +} + +bool HSAILELFMCAsmInfo::isValidUnquotedName(StringRef Name) const { + char First = Name.front(); + assert((First == '%' || First == '&' || First == '@') && + "Missing valid prefix character"); + Name = Name.drop_front(1); + + if (!Name.empty()) { + if (!isValidFirstChar(Name.front())) + return false; + + Name = Name.drop_front(); + } + + for (char C : Name) { + if (!isValidChar(C)) + return false; + } + + return true; +} Index: lib/Target/HSAIL/MCTargetDesc/HSAILMCCodeEmitter.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/HSAILMCCodeEmitter.h @@ -0,0 +1,71 @@ +//=== HSAILMCCodeEmitter.h - convert HSAIL code to machine code -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface to convert HSAIL code to machine code. +// +//===----------------------------------------------------------------------===// + +#ifndef HSAILMCCODEEMITTER_H +#define HSAILMCCODEEMITTER_H + +#define DEBUG_TYPE "mccodeemitter" +#include "HSAIL.h" +#include "HSAILSubtarget.h" +#include "HSAILInstrInfo.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +class HSAILMCCodeEmitter : public MCCodeEmitter { + HSAILMCCodeEmitter(const HSAILMCCodeEmitter &); // DO NOT IMPLEMENT + void operator=(const HSAILMCCodeEmitter &); // DO NOT IMPLEMENT + const MCInstrInfo &MCII; + const MCRegisterInfo &MRI; + MCContext &Ctx; + +public: + HSAILMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri, + MCContext &ctx) + : MCII(mcii), MRI(mri), Ctx(ctx) {} + + ~HSAILMCCodeEmitter() {} + + void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const; + + void EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte, + raw_ostream &OS) const; + + void EmitImmediate(const MCOperand &Disp, unsigned ImmSize, + MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS, + SmallVectorImpl &Fixups, int ImmOffset = 0) const; + + void EmitRegModRMByte(const MCOperand &ModRMReg, unsigned RegOpcodeFld, + unsigned &CurByte, raw_ostream &OS) const; + + void EmitSIBByte(unsigned SS, unsigned Index, unsigned Base, + unsigned &CurByte, raw_ostream &OS) const; + + void EmitMemModRMByte(const MCInst &MI, unsigned Op, unsigned RegOpcodeField, + uint64_t TSFlags, unsigned &CurByte, raw_ostream &OS, + SmallVectorImpl &Fixups) const; + + void EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override; + void EmitSegmentOverridePrefix(uint64_t TSFlags, unsigned &CurByte, + int MemOperand, const MCInst &MI, + raw_ostream &OS) const; +}; + +} // end anonymous namespace + +#endif Index: lib/Target/HSAIL/MCTargetDesc/HSAILMCCodeEmitter.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/HSAILMCCodeEmitter.cpp @@ -0,0 +1,104 @@ +//===-- HSAIL/HSAILMCCodeEmitter.cpp - Convert HSAIL code to machine code -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the HSAILMCCodeEmitter class. +// +//===----------------------------------------------------------------------===// + +#include "HSAILMCCodeEmitter.h" + +using namespace llvm; + +void HSAILMCCodeEmitter::EmitByte(unsigned char C, unsigned &CurByte, + raw_ostream &OS) const { + llvm_unreachable("When do we hit this?"); +} + +void HSAILMCCodeEmitter::EmitConstant(uint64_t Val, unsigned Size, + unsigned &CurByte, + raw_ostream &OS) const { + llvm_unreachable("When do we hit this?"); +} + +void HSAILMCCodeEmitter::EmitRegModRMByte(const MCOperand &ModRMReg, + unsigned RegOpcodeFld, + unsigned &CurByte, + raw_ostream &OS) const { + llvm_unreachable("When do we hit this?"); +} + +void HSAILMCCodeEmitter::EmitSIBByte(unsigned SS, unsigned Index, unsigned Base, + unsigned &CurByte, raw_ostream &OS) const { + llvm_unreachable("When do we hit this?"); +} + +namespace { +class DummyMCCodeEmitter : public MCCodeEmitter { +private: + DummyMCCodeEmitter(const DummyMCCodeEmitter &); // DO NOT IMPLEMENT + void operator=(const DummyMCCodeEmitter &); // DO NOT IMPLEMENT +protected: // Can only create subclasses. + DummyMCCodeEmitter(); + + const MCInstrInfo &m_ii; + const MCSubtargetInfo &m_sti; + MCContext &m_ctx; + +public: + DummyMCCodeEmitter(const MCInstrInfo &II, const MCSubtargetInfo &STI, + MCContext &Ctx) + : m_ii(II), m_sti(STI), m_ctx(Ctx) {} + DummyMCCodeEmitter(const MCInstrInfo &II, const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI, MCContext &Ctx) + : m_ii(II), m_sti(STI), m_ctx(Ctx) {} + + ~DummyMCCodeEmitter() override {} + + void EncodeInstruction(const MCInst &Inst, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override { + llvm_unreachable("DummyMCCodeEmitter::EncodeInstruction called..."); + } +}; +} + +MCCodeEmitter *llvm::createHSAILMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + MCContext &Ctx) { + return new HSAILMCCodeEmitter(MCII, MRI, Ctx); +} + +void HSAILMCCodeEmitter::EmitImmediate(const MCOperand &DispOp, unsigned Size, + MCFixupKind FixupKind, unsigned &CurByte, + raw_ostream &OS, + SmallVectorImpl &Fixups, + int ImmOffset) const { + llvm_unreachable("When do we hit this?"); +} + +void HSAILMCCodeEmitter::EmitMemModRMByte( + const MCInst &MI, unsigned Op, unsigned RegOpcodeField, uint64_t TSFlags, + unsigned &CurByte, raw_ostream &OS, + SmallVectorImpl &Fixups) const { + llvm_unreachable("When do we hit this?"); +} + +void HSAILMCCodeEmitter::EmitSegmentOverridePrefix(uint64_t TSFlags, + unsigned &CurByte, + int MemOperand, + const MCInst &MI, + raw_ostream &OS) const { + llvm_unreachable("When do we hit this?"); +} + +void HSAILMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + llvm_unreachable("When do we hit this?"); +} Index: lib/Target/HSAIL/MCTargetDesc/HSAILMCTargetDesc.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/HSAILMCTargetDesc.h @@ -0,0 +1,57 @@ +//===-- HSAILMCTargetDesc.h - HSAIL Target Descriptions ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Provides HSAIL specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_MCTARGETDESC_HSAILMCTARGETDESC_H +#define LLVM_LIB_TARGET_HSAIL_MCTARGETDESC_HSAILMCTARGETDESC_H + +#include "llvm/ADT/StringRef.h" + +namespace llvm { +class MCAsmBackend; +class MCCodeEmitter; +class MCContext; +class MCInstrInfo; +class MCObjectWriter; +class MCRegisterInfo; +class MCSubtargetInfo; +class Target; +class raw_ostream; + +extern Target TheHSAIL_32Target, TheHSAIL_64Target; + +MCCodeEmitter *createHSAILMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + MCContext &Ctx); + +MCAsmBackend *createHSAIL32AsmBackend(const Target &T, + const MCRegisterInfo &MRI, StringRef TT, + StringRef CPU); + +MCAsmBackend *createHSAIL64AsmBackend(const Target &T, + const MCRegisterInfo &MRI, StringRef TT, + StringRef CPU); + +} // End llvm namespace + +#define GET_REGINFO_ENUM +#include "HSAILGenRegisterInfo.inc" + +#define GET_INSTRINFO_ENUM +#define GET_INSTRINFO_OPERAND_ENUM +#include "HSAILGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "HSAILGenSubtargetInfo.inc" + +#endif Index: lib/Target/HSAIL/MCTargetDesc/HSAILMCTargetDesc.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/HSAILMCTargetDesc.cpp @@ -0,0 +1,128 @@ +//===-- HSAILMCTargetDesc.cpp - HSAIL Target Descriptions ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief This file provides HSAIL specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "HSAILMCTargetDesc.h" +#include "HSAILMCAsmInfo.h" +#include "HSAILMCCodeEmitter.h" +#include "InstPrinter/HSAILInstPrinter.h" +#include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/TargetRegistry.h" + +#include "HSAILTargetStreamer.h" + +#include "BRIGDwarfStreamer.h" +#include "RawVectorOstream.h" + +using namespace llvm; + +#define GET_INSTRINFO_MC_DESC +#include "HSAILGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "HSAILGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "HSAILGenRegisterInfo.inc" + +// MC related code probably should be in MCTargetDesc subdir +static MCCodeGenInfo *createHSAILMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + X->InitMCCodeGenInfo(RM, CM, OL); + return X; +} + +static MCInstrInfo *createHSAILMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitHSAILMCInstrInfo(X); + return X; +} + +static MCRegisterInfo *createHSAILMCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitHSAILMCRegisterInfo(X, 0); + return X; +} + +static MCSubtargetInfo *createHSAILMCSubtargetInfo(StringRef TT, StringRef CPU, + StringRef FS) { + MCSubtargetInfo * X = new MCSubtargetInfo(); + InitHSAILMCSubtargetInfo(X, TT, CPU, FS); + return X; +} + +#if 1 +static MCStreamer *createBRIGStreamer(const Triple &T, MCContext &Ctx, + MCAsmBackend &TAB, + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, + bool RelaxAll) { + // pass 0 instead of &_OS, if you do not want DWARF data to be forwarded to + // the provided stream + // this stream will be deleted in the destructor of BRIGAsmPrinter + RawVectorOstream *RVOS = new RawVectorOstream(&OS); + + return createBRIGDwarfStreamer(Ctx, TAB, *RVOS, Emitter, RelaxAll); +} +#else +static MCStreamer *createBRIGStreamer(MCStreamer &S, + const MCSubtargetInfo &STI) { + // pass 0 instead of &_OS, if you do not want DWARF data to be forwarded to + // the provided stream + // this stream will be deleted in the destructor of BRIGAsmPrinter + RawVectorOstream *RVOS = new RawVectorOstream(&OS); + + return createBRIGDwarfStreamer(Ctx, TAB, *RVOS, Emitter, RelaxAll); +} + +#endif + + +MCTargetStreamer * +createHSAILObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { + return new HSAILTargetStreamer(S); +} + +static MCInstPrinter *createHSAILMCInstPrinter(const Triple &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI) { + return new HSAILInstPrinter(MAI, MII, MRI); +} + +extern "C" void LLVMInitializeHSAILTargetMC() { + for (Target *T : { &TheHSAIL_32Target, &TheHSAIL_64Target }) { + RegisterMCAsmInfo X(*T); + + TargetRegistry::RegisterMCCodeGenInfo(*T, createHSAILMCCodeGenInfo); + TargetRegistry::RegisterMCInstrInfo(*T, createHSAILMCInstrInfo); + TargetRegistry::RegisterMCRegInfo(*T, createHSAILMCRegisterInfo); + TargetRegistry::RegisterMCSubtargetInfo(*T, createHSAILMCSubtargetInfo); + TargetRegistry::RegisterMCInstPrinter(*T, createHSAILMCInstPrinter); + TargetRegistry::RegisterMCCodeEmitter(*T, createHSAILMCCodeEmitter); + TargetRegistry::RegisterELFStreamer(*T, createBRIGStreamer); +// TargetRegistry::RegisterObjectTargetStreamer(*T, createHSAILObjectTargetStreamer); + } + + TargetRegistry::RegisterMCAsmBackend(TheHSAIL_32Target, + createHSAIL32AsmBackend); + TargetRegistry::RegisterMCAsmBackend(TheHSAIL_64Target, + createHSAIL64AsmBackend); +} Index: lib/Target/HSAIL/MCTargetDesc/HSAILTargetStreamer.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/HSAILTargetStreamer.h @@ -0,0 +1,28 @@ +//===- HSAILTargetStreamer.h -------------------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the HSAILTargetStreamer class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_MCTARGETDESC_HSAILTARGETSTREAMER_H +#define LLVM_LIB_TARGET_HSAIL_MCTARGETDESC_HSAILTARGETSTREAMER_H + +#include "llvm/MC/MCStreamer.h" + +namespace llvm { + +class HSAILTargetStreamer : public MCTargetStreamer { +public: + HSAILTargetStreamer(MCStreamer &S); + ~HSAILTargetStreamer(); +}; +} + +#endif Index: lib/Target/HSAIL/MCTargetDesc/HSAILTargetStreamer.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/HSAILTargetStreamer.cpp @@ -0,0 +1,28 @@ +//===- HSAILTargetStreamer.cpp ----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the HSAILTargetStreamer class. +// +//===----------------------------------------------------------------------===// +#include "HSAILTargetStreamer.h" + +using namespace llvm; + +HSAILTargetStreamer::HSAILTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} + +HSAILTargetStreamer::~HSAILTargetStreamer() {} + +class HSAILTargetAsmStreamer : public HSAILTargetStreamer { +public: + HSAILTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS); +}; + +HSAILTargetAsmStreamer::HSAILTargetAsmStreamer(MCStreamer &S, + formatted_raw_ostream &OS) + : HSAILTargetStreamer(S) {} Index: lib/Target/HSAIL/MCTargetDesc/LLVMBuild.txt =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/HSAIL/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = HSAILDesc +parent = HSAIL +required_libraries = MC HSAILInfo Support +add_to_library_groups = HSAIL Index: lib/Target/HSAIL/MCTargetDesc/RawVectorOstream.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/RawVectorOstream.h @@ -0,0 +1,58 @@ +//===-- RawVectorOstream.h --------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Interface for RawVectorOstream which is used for capturing DWARF data from +/// MC +/// layer +// +//===----------------------------------------------------------------------===// + +#ifndef RAW_VECTOR_OSTREAM_H +#define RAW_VECTOR_OSTREAM_H + +#include "llvm/Support/raw_ostream.h" +#include + +namespace llvm { + +class RawVectorOstream : public raw_pwrite_stream { + static const size_t AllocationChunk = 1024; + std::vector Data; + std::vector::size_type Position; + raw_pwrite_stream *Other; + + void write_impl(const char *Ptr, size_t Size) override; + void pwrite_impl(const char *Ptr, size_t Size, uint64_t Offset) override; + + uint64_t current_pos() const override; + +public: + explicit RawVectorOstream(raw_pwrite_stream *other); + virtual ~RawVectorOstream(); + + StringRef getData() const { + return StringRef(Data.data(), Data.size()); + } + + void releaseStream(); + + // Other stream is the stream that is used to forward all data written to the + // instance of RawVectorOstream. If other stream is NULL then RawVectorOstream + // does not forward captured data to any other stream, it just stores captured + // data in the internal memory buffer. + raw_ostream *getOtherStream() { + return Other; + } + + void setOtherStream(raw_pwrite_stream *Other); +}; +} + +#endif // RAW_VECTOR_OSTREAM_H Index: lib/Target/HSAIL/MCTargetDesc/RawVectorOstream.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/RawVectorOstream.cpp @@ -0,0 +1,70 @@ +//===-- RawVectorOstream.cpp ----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "RawVectorOstream.h" + +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +void RawVectorOstream::write_impl(const char *Ptr, size_t Size) { + /* copy data to the internal buffer */ + if (Position + Size > Data.size()) { + /* allocate more memory if required */ + size_t nChunks = + (Position + Size + AllocationChunk - 1) / AllocationChunk; + Data.resize(nChunks * AllocationChunk); + } + + std::copy(Ptr, Ptr + Size, Data.begin() + Position); + Position += Size; + /* write data to the other stream, if any provided */ + if (Other) { + Other->write(Ptr, Size); + } +} + +void RawVectorOstream::pwrite_impl(const char *Ptr, size_t Size, uint64_t Offset) { + flush(); + memcpy(Data.data() + Offset, Ptr, Size); + + if (Other) + Other->pwrite(Ptr, Size, Offset); +} + +uint64_t RawVectorOstream::current_pos() const { + return static_cast(Position); +} + +RawVectorOstream::RawVectorOstream(raw_pwrite_stream *other) + : Data(AllocationChunk), Position(0), Other(other) {} + +RawVectorOstream::~RawVectorOstream() { + // make sure that releaseStream has been called before RawVectorOstream is + // deleted + assert(!Other); +#if 0 + if(Other) { + flush(); + Other->flush(); + } +#endif +} + +void RawVectorOstream::releaseStream() { + if (Other) { + Other->flush(); + } + Other = 0; +} + +void RawVectorOstream::setOtherStream(raw_pwrite_stream *other) { + releaseStream(); + Other = other; +} Index: lib/Target/HSAIL/TargetInfo/CMakeLists.txt =================================================================== --- /dev/null +++ lib/Target/HSAIL/TargetInfo/CMakeLists.txt @@ -0,0 +1,12 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMHSAILInfo + HSAILTargetInfo.cpp + ) + +add_dependencies(LLVMHSAILInfo + LLVMMC + LLVMSupport + LLVMTarget + HSAILCommonTableGen + ) Index: lib/Target/HSAIL/TargetInfo/HSAILTargetInfo.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/TargetInfo/HSAILTargetInfo.cpp @@ -0,0 +1,27 @@ +//===-- HSAILTargetInfo.cpp - HSAIL Target Implementation -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HSAIL.h" + +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +Target llvm::TheHSAIL_32Target; +Target llvm::TheHSAIL_64Target; + +extern "C" void LLVMInitializeHSAILTargetInfo() { + RegisterTarget Target32( + TheHSAIL_32Target, "hsail", + "32-bit HSAIL: small machine model, addresses are 32 bit"); + + RegisterTarget Target64( + TheHSAIL_64Target, "hsail64", + "64-bit HSAIL: large machine model, addresses are 64 bit"); +} Index: lib/Target/HSAIL/TargetInfo/LLVMBuild.txt =================================================================== --- /dev/null +++ lib/Target/HSAIL/TargetInfo/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/HSAIL/TargetInfo/LLVMBuild.txt -----------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = HSAILInfo +parent = HSAIL +required_libraries = MC Support Target +add_to_library_groups = HSAIL