Index: lib/Target/AMDGPU/AMDGPULibCalls.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -131,6 +131,9 @@ // sin/cos bool fold_sincos(CallInst * CI, IRBuilder<> &B, AliasAnalysis * AA); + // __read_pipe/__write_pipe + bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, FuncInfo &FInfo); + // Get insertion point at entry. BasicBlock::iterator getEntryIns(CallInst * UI); // Insert an Alloc instruction. @@ -458,11 +461,11 @@ } static inline int getVecSize(const AMDGPULibFunc& FInfo) { - return FInfo.Leads[0].VectorSize; + return FInfo.getLeads()[0].VectorSize; } static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) { - return (AMDGPULibFunc::EType)FInfo.Leads[0].ArgType; + return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType; } Constant *AMDGPULibCalls::getFunction(Module *M, const FuncInfo& fInfo) { @@ -507,8 +510,8 @@ Value *opr0 = aCI->getArgOperand(0); AMDGPULibFunc nf; - nf.Leads[0].ArgType = FInfo.Leads[0].ArgType; - nf.Leads[0].VectorSize = FInfo.Leads[0].VectorSize; + nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType; + nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize; nf.setPrefix(AMDGPULibFunc::NATIVE); nf.setId(AMDGPULibFunc::EI_SIN); @@ -537,11 +540,10 @@ Function *Callee = aCI->getCalledFunction(); FuncInfo FInfo; - if (!parseFunctionName(Callee->getName(), &FInfo) || + if (!parseFunctionName(Callee->getName(), &FInfo) || !FInfo.isMangled() || FInfo.getPrefix() != AMDGPULibFunc::NOPFX || - getArgType(FInfo) == AMDGPULibFunc::F64 || - !HasNative(FInfo.getId()) || - !(AllNative || useNativeFunc(FInfo.getName())) ) { + getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) || + !(AllNative || useNativeFunc(FInfo.getName()))) { return false; } @@ -559,6 +561,74 @@ return true; } +// Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe +// builtin, with appended type size and alignment arguments, where 2 or 4 +// indicates the original number of arguments. The library has optimized version +// of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same +// power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N +// for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ..., +// 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4. +bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, + FuncInfo &FInfo) { + auto *Callee = CI->getCalledFunction(); + if (!Callee->isDeclaration()) + return false; + + assert(Callee->hasName() && "Invalid read_pipe/write_pipe function"); + auto *M = Callee->getParent(); + auto &Ctx = M->getContext(); + std::string Name = Callee->getName(); + auto NumArg = CI->getNumArgOperands(); + if (NumArg != 4 && NumArg != 6) + return false; + auto *PacketSize = CI->getArgOperand(NumArg - 2); + auto *PacketAlign = CI->getArgOperand(NumArg - 1); + if (!isa(PacketSize) || !isa(PacketAlign)) + return false; + unsigned Size = cast(PacketSize)->getZExtValue(); + unsigned Align = cast(PacketAlign)->getZExtValue(); + if (Size != Align || !isPowerOf2_32(Size)) + return false; + + Type *PtrElemTy; + if (Size <= 8) + PtrElemTy = Type::getIntNTy(Ctx, Size * 8); + else + PtrElemTy = VectorType::get(Type::getInt64Ty(Ctx), Size / 8); + unsigned PtrArgLoc = CI->getNumArgOperands() - 3; + auto PtrArg = CI->getArgOperand(PtrArgLoc); + unsigned PtrArgAS = PtrArg->getType()->getPointerAddressSpace(); + auto *PtrTy = llvm::PointerType::get(PtrElemTy, PtrArgAS); + + SmallVector ArgTys; + for (unsigned I = 0; I != PtrArgLoc; ++I) + ArgTys.push_back(CI->getArgOperand(I)->getType()); + ArgTys.push_back(PtrTy); + + Name = Name + "_" + std::to_string(Size); + + auto *FTy = FunctionType::get(Callee->getReturnType(), + ArrayRef(ArgTys), false); + auto *BCast = B.CreatePointerCast(PtrArg, PtrTy); + + SmallVector Args; + for (unsigned I = 0; I != PtrArgLoc; ++I) + Args.push_back(CI->getArgOperand(I)); + Args.push_back(BCast); + + AMDGPULibFunc NewLibFunc(Name, FTy); + auto *F = AMDGPULibFunc::getOrInsertFunction(M, NewLibFunc); + if (!F) + return false; + auto *NCI = B.CreateCall(F, Args); + NCI->setAttributes(CI->getAttributes()); + CI->replaceAllUsesWith(NCI); + CI->dropAllReferences(); + CI->eraseFromParent(); + + return true; +} + // This function returns false if no change; return true otherwise. bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) { this->CI = CI; @@ -636,6 +706,11 @@ return fold_sincos(CI, B, AA); break; + case AMDGPULibFunc::EI_READ_PIPE_2: + case AMDGPULibFunc::EI_READ_PIPE_4: + case AMDGPULibFunc::EI_WRITE_PIPE_2: + case AMDGPULibFunc::EI_WRITE_PIPE_4: + return fold_read_write_pipe(CI, B, FInfo); default: break; @@ -1259,7 +1334,7 @@ // for OpenCL 2.0 we have only generic implementation of sincos // function. AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo); - nf.Leads[0].PtrKind = AMDGPULibFunc::GENERIC; + nf.getLeads()[0].PtrKind = AMDGPULibFunc::GENERIC; Function *Fsincos = dyn_cast_or_null(getFunction(M, nf)); if (!Fsincos) return false; Index: lib/Target/AMDGPU/AMDGPULibFunc.h =================================================================== --- lib/Target/AMDGPU/AMDGPULibFunc.h +++ lib/Target/AMDGPU/AMDGPULibFunc.h @@ -18,7 +18,7 @@ class Function; class Module; -class AMDGPULibFunc { +class AMDGPULibFuncBase { public: enum EFuncId { EI_NONE, @@ -26,6 +26,14 @@ // IMPORTANT: enums below should go in ascending by 1 value order // because they are used as indexes in the mangling rules table. // don't use explicit value assignment. + // + // There are two types of library functions: those with mangled + // name and those with unmangled name. The enums for the library + // functions with mangled name are defined before enums for the + // library functions with unmangled name. The enum for the last + // library function with mangled name is EI_LAST_MANGLED. + // + // Library functions with mangled name. EI_ABS, EI_ABS_DIFF, EI_ACOS, @@ -144,7 +152,6 @@ EI_POWR, EI_PREFETCH, EI_RADIANS, - EI_READ_PIPE, EI_RECIP, EI_REMAINDER, EI_REMQUO, @@ -212,7 +219,6 @@ EI_WRITE_IMAGEF, EI_WRITE_IMAGEI, EI_WRITE_IMAGEUI, - EI_WRITE_PIPE, EI_NCOS, EI_NEXP2, EI_NFMA, @@ -225,6 +231,14 @@ EI_FLDEXP, EI_CLASS, EI_RCBRT, + EI_LAST_MANGLED = + EI_RCBRT, /* The last library function with mangled name */ + + // Library functions with unmangled name. + EI_READ_PIPE_2, + EI_READ_PIPE_4, + EI_WRITE_PIPE_2, + EI_WRITE_PIPE_4, EX_INTRINSICS_COUNT }; @@ -298,51 +312,144 @@ template void mangleItanium(Stream& os); }; + static bool isMangled(EFuncId Id) { + return static_cast(Id) <= static_cast(EI_LAST_MANGLED); + } +}; +class AMDGPULibFuncImpl : public AMDGPULibFuncBase { public: - static bool parse(StringRef mangledName, AMDGPULibFunc &iInfo); - - AMDGPULibFunc(); - AMDGPULibFunc(EFuncId id, const AMDGPULibFunc& copyFrom); + AMDGPULibFuncImpl() {} + virtual ~AMDGPULibFuncImpl() {} - ENamePrefix getPrefix() const { return FKind; } - EFuncId getId() const { return FuncId; } + /// Get unmangled name for mangled library function and name for unmangled + /// library function. + virtual std::string getName() const = 0; + virtual unsigned getNumArgs() const = 0; + EFuncId getId() const { return FuncId; } + ENamePrefix getPrefix() const { return FKind; } - std::string getName() const; - unsigned getNumArgs() const; + bool isMangled() const { return AMDGPULibFuncBase::isMangled(FuncId); } - FunctionType* getFunctionType(Module& M) const; + void setId(EFuncId id) { FuncId = id; } + virtual bool parseFuncName(StringRef &mangledName) = 0; - std::string mangle() const; + /// \return The mangled function name for mangled library functions + /// and unmangled function name for unmangled library functions. + virtual std::string mangle() const = 0; + void setName(StringRef N) { Name = N; } void setPrefix(ENamePrefix pfx) { FKind = pfx; } - void setId(EFuncId id) { FuncId = id; } - - static Function* getFunction(llvm::Module *M, const AMDGPULibFunc& fInfo); - static Function* getOrInsertFunction(llvm::Module *M, - const AMDGPULibFunc& fInfo); + virtual FunctionType *getFunctionType(Module &M) const = 0; - static StringRef getUnmangledName(const StringRef& mangledName); +protected: + EFuncId FuncId; + std::string Name; + ENamePrefix FKind; +}; - Param Leads[2]; +/// Wrapper class for AMDGPULIbFuncImpl +class AMDGPULibFunc : public AMDGPULibFuncBase { +public: + explicit AMDGPULibFunc() : Impl(std::unique_ptr()) {} + AMDGPULibFunc(const AMDGPULibFunc &F); + /// Clone a mangled library func with the Id \p Id and argument info from \p + /// CopyFrom. + explicit AMDGPULibFunc(EFuncId Id, const AMDGPULibFunc &CopyFrom); + /// Construct an unmangled library function on the fly. + explicit AMDGPULibFunc(StringRef FName, FunctionType *FT); + + AMDGPULibFunc &operator=(const AMDGPULibFunc &F); + + /// Get unmangled name for mangled library function and name for unmangled + /// library function. + std::string getName() const { return Impl->getName(); } + unsigned getNumArgs() const { return Impl->getNumArgs(); } + EFuncId getId() const { return Impl->getId(); } + ENamePrefix getPrefix() const { return Impl->getPrefix(); } + /// Get leading parameters for mangled lib functions. + Param *getLeads(); + const Param *getLeads() const; + + bool isMangled() const { return Impl->isMangled(); } + void setId(EFuncId Id) { Impl->setId(Id); } + bool parseFuncName(StringRef &MangledName) { + return Impl->parseFuncName(MangledName); + } + + /// \return The mangled function name for mangled library functions + /// and unmangled function name for unmangled library functions. + std::string mangle() const { return Impl->mangle(); } + + void setName(StringRef N) { Impl->setName(N); } + void setPrefix(ENamePrefix PFX) { Impl->setPrefix(PFX); } + + FunctionType *getFunctionType(Module &M) const { + return Impl->getFunctionType(M); + } + static Function *getFunction(llvm::Module *M, const AMDGPULibFunc &fInfo); + + static Function *getOrInsertFunction(llvm::Module *M, + const AMDGPULibFunc &fInfo); + static bool parse(StringRef MangledName, AMDGPULibFunc &Ptr); private: - EFuncId FuncId; - ENamePrefix FKind; - std::string Name; + /// Initialize as a mangled library function. + void initMangled(); + std::unique_ptr Impl; +}; + +class AMDGPUMangledLibFunc : public AMDGPULibFuncImpl { +public: + Param Leads[2]; + + explicit AMDGPUMangledLibFunc(); + explicit AMDGPUMangledLibFunc(EFuncId id, + const AMDGPUMangledLibFunc ©From); - void reset(); + std::string getName() const override; + unsigned getNumArgs() const override; + FunctionType *getFunctionType(Module &M) const override; + static StringRef getUnmangledName(StringRef MangledName); - std::string mangleNameItanium() const; - bool parseItanuimName(StringRef& mangledName); + bool parseFuncName(StringRef &mangledName) override; - std::string mangleName(const StringRef& name) const; - bool parseName(const StringRef& mangledName); + // Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const AMDGPULibFuncImpl *F) { return F->isMangled(); } - template - void writeName(Stream& OS) const; + std::string mangle() const override; + +private: + std::string mangleNameItanium() const; + + std::string mangleName(StringRef Name) const; + bool parseUnmangledName(StringRef MangledName); + + template void writeName(Stream &OS) const; }; +class AMDGPUUnmangledLibFunc : public AMDGPULibFuncImpl { + FunctionType *FuncTy; + +public: + explicit AMDGPUUnmangledLibFunc(); + explicit AMDGPUUnmangledLibFunc(StringRef FName, FunctionType *FT) { + Name = FName; + FuncTy = FT; + } + std::string getName() const override { return Name; } + unsigned getNumArgs() const override; + FunctionType *getFunctionType(Module &M) const override { return FuncTy; } + + bool parseFuncName(StringRef &Name) override; + + // Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const AMDGPULibFuncImpl *F) { return !F->isMangled(); } + + std::string mangle() const override { return Name; } + + void setFunctionType(FunctionType *FT) { FuncTy = FT; } +}; } #endif // _AMDGPU_LIBFUNC_H_ Index: lib/Target/AMDGPU/AMDGPULibFunc.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULibFunc.cpp +++ lib/Target/AMDGPU/AMDGPULibFunc.cpp @@ -65,6 +65,51 @@ unsigned getNumArgs() const; }; +// Information about library functions with unmangled names. +class UnmangledFuncInfo { + StringRef const Name; + unsigned NumArgs; + + // Table for all lib functions with unmangled names. + static const UnmangledFuncInfo Table[]; + + // Number of entries in Table. + static const unsigned TableSize; + + // Map function name to index. + class NameMap : public StringMap { + public: + NameMap() { + for (unsigned I = 0; I != TableSize; ++I) + (*this)[Table[I].Name] = I; + } + }; + friend class NameMap; + static NameMap Map; + +public: + using ID = AMDGPULibFunc::EFuncId; + UnmangledFuncInfo() = default; + UnmangledFuncInfo(StringRef _Name, unsigned _NumArgs) + : Name(_Name), NumArgs(_NumArgs) {} + // Get index to Table by function name. + static bool lookup(StringRef Name, ID &Id); + static unsigned toIndex(ID Id) { + assert(static_cast(Id) > + static_cast(AMDGPULibFunc::EI_LAST_MANGLED) && + "Invalid unmangled library function"); + return static_cast(Id) - 1 - + static_cast(AMDGPULibFunc::EI_LAST_MANGLED); + } + static ID toFuncId(unsigned Index) { + assert(Index < TableSize && "Invalid unmangled library function"); + return static_cast( + Index + 1 + static_cast(AMDGPULibFunc::EI_LAST_MANGLED)); + } + static unsigned getNumArgs(ID Id) { return Table[toIndex(Id)].NumArgs; } + static StringRef getName(ID Id) { return Table[toIndex(Id)].Name; } +}; + unsigned ManglingRule::getNumArgs() const { unsigned I=0; while (I < (sizeof Param/sizeof Param[0]) && Param[I]) ++I; @@ -215,7 +260,6 @@ { "powr" , {1}, {E_ANY,E_COPY}}, { "prefetch" , {1}, {E_CONSTPTR_ANY,EX_SIZET}}, { "radians" , {1}, {E_ANY}}, -{ "read_pipe" , {4}, {E_COPY,EX_RESERVEDID,EX_UINT,E_ANY}}, { "recip" , {1}, {E_ANY}}, { "remainder" , {1}, {E_ANY,E_COPY}}, { "remquo" , {1,3}, {E_ANY,E_COPY,E_ANY}}, @@ -283,7 +327,6 @@ { "write_imagef" , {1}, {E_ANY,E_IMAGECOORDS,EX_FLOAT4}}, { "write_imagei" , {1}, {E_ANY,E_IMAGECOORDS,EX_INTV4}}, { "write_imageui" , {1}, {E_ANY,E_IMAGECOORDS,EX_UINTV4}}, -{ "write_pipe" , {4}, {E_COPY,EX_RESERVEDID,EX_UINT,E_ANY}}, { "ncos" , {1}, {E_ANY} }, { "nexp2" , {1}, {E_ANY} }, { "nfma" , {1}, {E_ANY, E_COPY, E_COPY} }, @@ -298,6 +341,19 @@ { "rcbrt" , {1}, {E_ANY} }, }; +// Library functions with unmangled name. +const UnmangledFuncInfo UnmangledFuncInfo::Table[] = { + {"__read_pipe_2", 4}, + {"__read_pipe_4", 6}, + {"__write_pipe_2", 4}, + {"__write_pipe_4", 6}, +}; + +const unsigned UnmangledFuncInfo::TableSize = + sizeof(UnmangledFuncInfo::Table) / sizeof(UnmangledFuncInfo::Table[0]); + +UnmangledFuncInfo::NameMap UnmangledFuncInfo::Map; + static const struct ManglingRulesMap : public StringMap { ManglingRulesMap() : StringMap(sizeof(manglingRules)/sizeof(manglingRules[0])) { @@ -461,18 +517,7 @@ } // end anonymous namespace -AMDGPULibFunc::AMDGPULibFunc() { - reset(); -} - -AMDGPULibFunc::AMDGPULibFunc(EFuncId id, const AMDGPULibFunc& copyFrom) - : FuncId(id) { - FKind = copyFrom.FKind; - Leads[0] = copyFrom.Leads[0]; - Leads[1] = copyFrom.Leads[1]; -} - -void AMDGPULibFunc::reset() { +AMDGPUMangledLibFunc::AMDGPUMangledLibFunc() { FuncId = EI_NONE; FKind = NOPFX; Leads[0].reset(); @@ -480,6 +525,19 @@ Name.clear(); } +AMDGPUUnmangledLibFunc::AMDGPUUnmangledLibFunc() { + FuncId = EI_NONE; + FuncTy = nullptr; +} + +AMDGPUMangledLibFunc::AMDGPUMangledLibFunc( + EFuncId id, const AMDGPUMangledLibFunc ©From) { + FuncId = id; + FKind = copyFrom.FKind; + Leads[0] = copyFrom.Leads[0]; + Leads[1] = copyFrom.Leads[1]; +} + /////////////////////////////////////////////////////////////////////////////// // Demangling @@ -508,8 +566,8 @@ return Pfx; } -bool AMDGPULibFunc::parseName(const StringRef& fullName) { - FuncId = static_cast(manglingRulesMap.lookup(fullName)); +bool AMDGPUMangledLibFunc::parseUnmangledName(StringRef FullName) { + FuncId = static_cast(manglingRulesMap.lookup(FullName)); return FuncId != EI_NONE; } @@ -601,10 +659,11 @@ return true; } -bool AMDGPULibFunc::parseItanuimName(StringRef& mangledName) { +bool AMDGPUMangledLibFunc::parseFuncName(StringRef &mangledName) { StringRef Name = eatLengthPrefixedName(mangledName); FKind = parseNamePrefix(Name); - if (!parseName(Name)) return false; + if (!parseUnmangledName(Name)) + return false; const ManglingRule& Rule = manglingRules[FuncId]; ItaniumParamParser Parser; @@ -619,30 +678,42 @@ return true; } -bool AMDGPULibFunc::parse(StringRef mangledName, AMDGPULibFunc& iInfo) { - iInfo.reset(); - if (mangledName.empty()) +bool AMDGPUUnmangledLibFunc::parseFuncName(StringRef &Name) { + if (!UnmangledFuncInfo::lookup(Name, FuncId)) return false; + setName(Name); + return true; +} - if (eatTerm(mangledName, "_Z")) { - return iInfo.parseItanuimName(mangledName); +bool AMDGPULibFunc::parse(StringRef FuncName, AMDGPULibFunc &F) { + if (FuncName.empty()) { + F.Impl = std::unique_ptr(); + return false; } + + if (eatTerm(FuncName, "_Z")) + F.Impl = make_unique(); + else + F.Impl = make_unique(); + if (F.Impl->parseFuncName(FuncName)) + return true; + + F.Impl = std::unique_ptr(); return false; } -StringRef AMDGPULibFunc::getUnmangledName(const StringRef& mangledName) { +StringRef AMDGPUMangledLibFunc::getUnmangledName(StringRef mangledName) { StringRef S = mangledName; if (eatTerm(S, "_Z")) return eatLengthPrefixedName(S); return StringRef(); } - /////////////////////////////////////////////////////////////////////////////// // Mangling template -void AMDGPULibFunc::writeName(Stream& OS) const { +void AMDGPUMangledLibFunc::writeName(Stream &OS) const { const char *Pfx = ""; switch (FKind) { case NATIVE: Pfx = "native_"; break; @@ -658,9 +729,7 @@ } } -std::string AMDGPULibFunc::mangle() const { - return mangleNameItanium(); -} +std::string AMDGPUMangledLibFunc::mangle() const { return mangleNameItanium(); } /////////////////////////////////////////////////////////////////////////////// // Itanium Mangling @@ -788,7 +857,7 @@ }; } // namespace -std::string AMDGPULibFunc::mangleNameItanium() const { +std::string AMDGPUMangledLibFunc::mangleNameItanium() const { SmallString<128> Buf; raw_svector_ostream S(Buf); SmallString<128> NameBuf; @@ -850,7 +919,7 @@ return T; } -FunctionType* AMDGPULibFunc::getFunctionType(Module& M) const { +FunctionType *AMDGPUMangledLibFunc::getFunctionType(Module &M) const { LLVMContext& C = M.getContext(); std::vector Args; ParamIterator I(Leads, manglingRules[FuncId]); @@ -863,18 +932,22 @@ Args, false); } -unsigned AMDGPULibFunc::getNumArgs() const { +unsigned AMDGPUMangledLibFunc::getNumArgs() const { return manglingRules[FuncId].getNumArgs(); } -std::string AMDGPULibFunc::getName() const { +unsigned AMDGPUUnmangledLibFunc::getNumArgs() const { + return UnmangledFuncInfo::getNumArgs(FuncId); +} + +std::string AMDGPUMangledLibFunc::getName() const { SmallString<128> Buf; raw_svector_ostream OS(Buf); writeName(OS); return OS.str(); } -Function *AMDGPULibFunc::getFunction(Module *M, const AMDGPULibFunc& fInfo) { +Function *AMDGPULibFunc::getFunction(Module *M, const AMDGPULibFunc &fInfo) { std::string FuncName = fInfo.mangle(); Function *F = dyn_cast_or_null( M->getValueSymbolTable().lookup(FuncName)); @@ -889,7 +962,7 @@ } Function *AMDGPULibFunc::getOrInsertFunction(Module *M, - const AMDGPULibFunc& fInfo) { + const AMDGPULibFunc &fInfo) { std::string const FuncName = fInfo.mangle(); Function *F = dyn_cast_or_null( M->getValueSymbolTable().lookup(FuncName)); @@ -929,3 +1002,52 @@ return cast(C); } + +bool UnmangledFuncInfo::lookup(StringRef Name, ID &Id) { + auto Loc = Map.find(Name); + if (Loc != Map.end()) { + Id = toFuncId(Loc->second); + return true; + } + Id = AMDGPULibFunc::EI_NONE; + return false; +} + +AMDGPULibFunc::AMDGPULibFunc(const AMDGPULibFunc &F) { + if (auto *MF = dyn_cast(F.Impl.get())) + Impl.reset(new AMDGPUMangledLibFunc(*MF)); + else if (auto *UMF = dyn_cast(F.Impl.get())) + Impl.reset(new AMDGPUUnmangledLibFunc(*UMF)); + else + Impl = std::unique_ptr(); +} + +AMDGPULibFunc &AMDGPULibFunc::operator=(const AMDGPULibFunc &F) { + if (this == &F) + return *this; + new (this) AMDGPULibFunc(F); + return *this; +} + +AMDGPULibFunc::AMDGPULibFunc(EFuncId Id, const AMDGPULibFunc &CopyFrom) { + assert(AMDGPULibFuncBase::isMangled(Id) && CopyFrom.isMangled() && + "not supported"); + Impl.reset(new AMDGPUMangledLibFunc( + Id, *cast(CopyFrom.Impl.get()))); +} + +AMDGPULibFunc::AMDGPULibFunc(StringRef Name, FunctionType *FT) { + Impl.reset(new AMDGPUUnmangledLibFunc(Name, FT)); +} + +void AMDGPULibFunc::initMangled() { Impl.reset(new AMDGPUMangledLibFunc()); } + +AMDGPULibFunc::Param *AMDGPULibFunc::getLeads() { + if (!Impl) + initMangled(); + return cast(Impl.get())->Leads; +} + +const AMDGPULibFunc::Param *AMDGPULibFunc::getLeads() const { + return cast(Impl.get())->Leads; +} Index: test/CodeGen/AMDGPU/simplify-libcalls.ll =================================================================== --- test/CodeGen/AMDGPU/simplify-libcalls.ll +++ test/CodeGen/AMDGPU/simplify-libcalls.ll @@ -1,6 +1,6 @@ -; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-simplify-libcall <%s | FileCheck -check-prefix=GCN -check-prefix=GCN-POSTLINK %s -; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-simplify-libcall -amdgpu-prelink <%s | FileCheck -check-prefix=GCN -check-prefix=GCN-PRELINK %s -; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-use-native -amdgpu-prelink <%s | FileCheck -check-prefix=GCN -check-prefix=GCN-NATIVE %s +; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-simplify-libcall -instnamer <%s | FileCheck -check-prefix=GCN -check-prefix=GCN-POSTLINK %s +; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-simplify-libcall -amdgpu-prelink -instnamer <%s | FileCheck -check-prefix=GCN -check-prefix=GCN-PRELINK %s +; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-use-native -amdgpu-prelink -instnamer <%s | FileCheck -check-prefix=GCN -check-prefix=GCN-NATIVE %s ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos ; GCN-POSTLINK: tail call fast float @_Z3sinf( @@ -299,8 +299,8 @@ ; GCN: %__powx2 = fmul fast float %tmp, %tmp ; GCN: %__powx21 = fmul fast float %__powx2, %__powx2 ; GCN: %__powx22 = fmul fast float %__powx2, %tmp -; GCN: %0 = fmul fast float %__powx21, %__powx21 -; GCN: %__powprod3 = fmul fast float %0, %__powx22 +; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21 +; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22 define amdgpu_kernel void @test_pow_c(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 @@ -314,8 +314,8 @@ ; GCN: %__powx2 = fmul fast float %tmp, %tmp ; GCN: %__powx21 = fmul fast float %__powx2, %__powx2 ; GCN: %__powx22 = fmul fast float %__powx2, %tmp -; GCN: %0 = fmul fast float %__powx21, %__powx21 -; GCN: %__powprod3 = fmul fast float %0, %__powx22 +; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21 +; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22 define amdgpu_kernel void @test_powr_c(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 @@ -331,8 +331,8 @@ ; GCN: %__powx2 = fmul fast float %tmp, %tmp ; GCN: %__powx21 = fmul fast float %__powx2, %__powx2 ; GCN: %__powx22 = fmul fast float %__powx2, %tmp -; GCN: %0 = fmul fast float %__powx21, %__powx21 -; GCN: %__powprod3 = fmul fast float %0, %__powx22 +; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21 +; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22 define amdgpu_kernel void @test_pown_c(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 @@ -350,12 +350,12 @@ ; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %__fabs) ; GCN-PRELINK: %__ylogx = fmul fast float %__log2, 1.013000e+03 ; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx) -; GCN-PRELINK: %0 = bitcast float %tmp to i32 -; GCN-PRELINK: %__pow_sign = and i32 %0, -2147483648 -; GCN-PRELINK: %1 = bitcast float %__exp2 to i32 -; GCN-PRELINK: %2 = or i32 %__pow_sign, %1 -; GCN-PRELINK: %3 = bitcast float addrspace(1)* %a to i32 addrspace(1)* -; GCN-PRELINK: store i32 %2, i32 addrspace(1)* %3, align 4 +; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32 +; GCN-PRELINK: %__pow_sign = and i32 %[[r0]], -2147483648 +; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32 +; GCN-PRELINK: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]] +; GCN-PRELINK: %[[r3:.*]] = bitcast float addrspace(1)* %a to i32 addrspace(1)* +; GCN-PRELINK: store i32 %[[r2]], i32 addrspace(1)* %[[r3]], align 4 define amdgpu_kernel void @test_pow(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 @@ -393,12 +393,12 @@ ; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %pownI2F ; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx) ; GCN-PRELINK: %__yeven = shl i32 %conv, 31 -; GCN-PRELINK: %0 = bitcast float %tmp to i32 -; GCN-PRELINK: %__pow_sign = and i32 %__yeven, %0 -; GCN-PRELINK: %1 = bitcast float %__exp2 to i32 -; GCN-PRELINK: %2 = or i32 %__pow_sign, %1 -; GCN-PRELINK: %3 = bitcast float addrspace(1)* %a to i32 addrspace(1)* -; GCN-PRELINK: store i32 %2, i32 addrspace(1)* %3, align 4 +; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32 +; GCN-PRELINK: %__pow_sign = and i32 %__yeven, %[[r0]] +; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32 +; GCN-PRELINK: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]] +; GCN-PRELINK: %[[r3:.*]] = bitcast float addrspace(1)* %a to i32 addrspace(1)* +; GCN-PRELINK: store i32 %[[r2]], i32 addrspace(1)* %[[r3]], align 4 define amdgpu_kernel void @test_pown(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 @@ -692,3 +692,96 @@ } declare float @_Z6sincosfPU3AS4f(float, float addrspace(4)*) + +%opencl.pipe_t = type opaque +%opencl.reserve_id_t = type opaque + +; GCN-LABEL: {{^}}define amdgpu_kernel void @test_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) +; GCN-PRELINK: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32 addrspace(4)* %{{.*}}) #[[NOUNWIND:[0-9]+]] +; GCN-PRELINK: call i32 @__read_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 2, i32 addrspace(4)* %{{.*}}) #[[NOUNWIND]] +define amdgpu_kernel void @test_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) local_unnamed_addr { +entry: + %tmp = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)* + %tmp1 = addrspacecast i8 addrspace(1)* %tmp to i8 addrspace(4)* + %tmp2 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8 addrspace(4)* %tmp1, i32 4, i32 4) #0 + %tmp3 = tail call %opencl.reserve_id_t* @__reserve_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4) + %tmp4 = tail call i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t* %tmp3, i32 2, i8 addrspace(4)* %tmp1, i32 4, i32 4) #0 + tail call void @__commit_read_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t* %tmp3, i32 4, i32 4) + ret void +} + +declare i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)*, i8 addrspace(4)*, i32, i32) + +declare %opencl.reserve_id_t* @__reserve_read_pipe(%opencl.pipe_t addrspace(1)*, i32, i32, i32) + +declare i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t*, i32, i8 addrspace(4)*, i32, i32) + +declare void @__commit_read_pipe(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t*, i32, i32) + +; GCN-LABEL: {{^}}define amdgpu_kernel void @test_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) +; GCN-PRELINK: call i32 @__write_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32 addrspace(4)* %{{.*}}) #[[NOUNWIND]] +; GCN-PRELINK: call i32 @__write_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 2, i32 addrspace(4)* %{{.*}}) #[[NOUNWIND]] +define amdgpu_kernel void @test_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) local_unnamed_addr { +entry: + %tmp = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)* + %tmp1 = addrspacecast i8 addrspace(1)* %tmp to i8 addrspace(4)* + %tmp2 = tail call i32 @__write_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8 addrspace(4)* %tmp1, i32 4, i32 4) #0 + %tmp3 = tail call %opencl.reserve_id_t* @__reserve_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4) #0 + %tmp4 = tail call i32 @__write_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t* %tmp3, i32 2, i8 addrspace(4)* %tmp1, i32 4, i32 4) #0 + tail call void @__commit_write_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t* %tmp3, i32 4, i32 4) #0 + ret void +} + +declare i32 @__write_pipe_2(%opencl.pipe_t addrspace(1)*, i8 addrspace(4)*, i32, i32) local_unnamed_addr + +declare %opencl.reserve_id_t* @__reserve_write_pipe(%opencl.pipe_t addrspace(1)*, i32, i32, i32) local_unnamed_addr + +declare i32 @__write_pipe_4(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t*, i32, i8 addrspace(4)*, i32, i32) local_unnamed_addr + +declare void @__commit_write_pipe(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t*, i32, i32) local_unnamed_addr + +%struct.S = type { [100 x i32] } + +; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pipe_size +; GCN-PRELINK: call i32 @__read_pipe_2_1(%opencl.pipe_t addrspace(1)* %{{.*}} i8 addrspace(4)* %{{.*}}) #[[NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2_2(%opencl.pipe_t addrspace(1)* %{{.*}} i16 addrspace(4)* %{{.*}}) #[[NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}} i32 addrspace(4)* %{{.*}}) #[[NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2_8(%opencl.pipe_t addrspace(1)* %{{.*}} i64 addrspace(4)* %{{.*}}) #[[NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2_16(%opencl.pipe_t addrspace(1)* %{{.*}}, <2 x i64> addrspace(4)* %{{.*}}) #[[NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2_32(%opencl.pipe_t addrspace(1)* %{{.*}}, <4 x i64> addrspace(4)* %{{.*}} #[[NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2_64(%opencl.pipe_t addrspace(1)* %{{.*}}, <8 x i64> addrspace(4)* %{{.*}} #[[NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2_128(%opencl.pipe_t addrspace(1)* %{{.*}}, <16 x i64> addrspace(4)* %{{.*}} #[[NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %{{.*}}, i8 addrspace(4)* %{{.*}} i32 400, i32 4) #[[NOUNWIND]] +define amdgpu_kernel void @test_pipe_size(%opencl.pipe_t addrspace(1)* %p1, i8 addrspace(1)* %ptr1, %opencl.pipe_t addrspace(1)* %p2, i16 addrspace(1)* %ptr2, %opencl.pipe_t addrspace(1)* %p4, i32 addrspace(1)* %ptr4, %opencl.pipe_t addrspace(1)* %p8, i64 addrspace(1)* %ptr8, %opencl.pipe_t addrspace(1)* %p16, <2 x i64> addrspace(1)* %ptr16, %opencl.pipe_t addrspace(1)* %p32, <4 x i64> addrspace(1)* %ptr32, %opencl.pipe_t addrspace(1)* %p64, <8 x i64> addrspace(1)* %ptr64, %opencl.pipe_t addrspace(1)* %p128, <16 x i64> addrspace(1)* %ptr128, %opencl.pipe_t addrspace(1)* %pu, %struct.S addrspace(1)* %ptru) local_unnamed_addr #0 { +entry: + %tmp = addrspacecast i8 addrspace(1)* %ptr1 to i8 addrspace(4)* + %tmp1 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p1, i8 addrspace(4)* %tmp, i32 1, i32 1) #0 + %tmp2 = bitcast i16 addrspace(1)* %ptr2 to i8 addrspace(1)* + %tmp3 = addrspacecast i8 addrspace(1)* %tmp2 to i8 addrspace(4)* + %tmp4 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p2, i8 addrspace(4)* %tmp3, i32 2, i32 2) #0 + %tmp5 = bitcast i32 addrspace(1)* %ptr4 to i8 addrspace(1)* + %tmp6 = addrspacecast i8 addrspace(1)* %tmp5 to i8 addrspace(4)* + %tmp7 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p4, i8 addrspace(4)* %tmp6, i32 4, i32 4) #0 + %tmp8 = bitcast i64 addrspace(1)* %ptr8 to i8 addrspace(1)* + %tmp9 = addrspacecast i8 addrspace(1)* %tmp8 to i8 addrspace(4)* + %tmp10 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p8, i8 addrspace(4)* %tmp9, i32 8, i32 8) #0 + %tmp11 = bitcast <2 x i64> addrspace(1)* %ptr16 to i8 addrspace(1)* + %tmp12 = addrspacecast i8 addrspace(1)* %tmp11 to i8 addrspace(4)* + %tmp13 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p16, i8 addrspace(4)* %tmp12, i32 16, i32 16) #0 + %tmp14 = bitcast <4 x i64> addrspace(1)* %ptr32 to i8 addrspace(1)* + %tmp15 = addrspacecast i8 addrspace(1)* %tmp14 to i8 addrspace(4)* + %tmp16 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p32, i8 addrspace(4)* %tmp15, i32 32, i32 32) #0 + %tmp17 = bitcast <8 x i64> addrspace(1)* %ptr64 to i8 addrspace(1)* + %tmp18 = addrspacecast i8 addrspace(1)* %tmp17 to i8 addrspace(4)* + %tmp19 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p64, i8 addrspace(4)* %tmp18, i32 64, i32 64) #0 + %tmp20 = bitcast <16 x i64> addrspace(1)* %ptr128 to i8 addrspace(1)* + %tmp21 = addrspacecast i8 addrspace(1)* %tmp20 to i8 addrspace(4)* + %tmp22 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p128, i8 addrspace(4)* %tmp21, i32 128, i32 128) #0 + %tmp23 = bitcast %struct.S addrspace(1)* %ptru to i8 addrspace(1)* + %tmp24 = addrspacecast i8 addrspace(1)* %tmp23 to i8 addrspace(4)* + %tmp25 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %pu, i8 addrspace(4)* %tmp24, i32 400, i32 4) #0 + ret void +} + +; CGN-PRELINK: attributes #[[NOUNWIND]] = { nounwind } +attributes #0 = { nounwind }