Index: lib/Target/AMDGPU/AMDGPULibCalls.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -64,6 +64,8 @@ private: typedef llvm::AMDGPULibFunc FuncInfo; + typedef llvm::AMDGPUMangledLibFunc MangledFuncInfo; + typedef llvm::AMDGPUUnmangledLibFunc UnmangledFuncInfo; // -fuse-native. bool AllNative = false; @@ -72,71 +74,81 @@ // Return a pointer (pointer expr) to the function if function defintion with // "FuncName" exists. It may create a new function prototype in pre-link mode. - Constant *getFunction(Module *M, const FuncInfo& fInfo); + Constant *getFunction(Module *M, const MangledFuncInfo &fInfo); // Replace a normal function with its native version. - bool replaceWithNative(CallInst *CI, const FuncInfo &FInfo); + bool replaceWithNative(CallInst *CI, const MangledFuncInfo &FInfo); - bool parseFunctionName(const StringRef& FMangledName, - FuncInfo *FInfo=nullptr /*out*/); + std::unique_ptr parseFunctionName(const StringRef &Name); - bool TDOFold(CallInst *CI, const FuncInfo &FInfo); + bool TDOFold(CallInst *CI, const MangledFuncInfo &FInfo); /* Specialized optimizations */ // recip (half or native) - bool fold_recip(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); + bool fold_recip(CallInst *CI, IRBuilder<> &B, const MangledFuncInfo &FInfo); // divide (half or native) - bool fold_divide(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); + bool fold_divide(CallInst *CI, IRBuilder<> &B, const MangledFuncInfo &FInfo); // pow/powr/pown - bool fold_pow(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); + bool fold_pow(CallInst *CI, IRBuilder<> &B, const MangledFuncInfo &FInfo); // rootn - bool fold_rootn(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); + bool fold_rootn(CallInst *CI, IRBuilder<> &B, const MangledFuncInfo &FInfo); // fma/mad - bool fold_fma_mad(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); + bool fold_fma_mad(CallInst *CI, IRBuilder<> &B, const MangledFuncInfo &FInfo); // -fuse-native for sincos - bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo); + bool sincosUseNative(CallInst *aCI, const MangledFuncInfo &FInfo); // evaluate calls if calls' arguments are constants. - bool evaluateScalarMathFunc(FuncInfo &FInfo, double& Res0, - double& Res1, Constant *copr0, Constant *copr1, Constant *copr2); - bool evaluateCall(CallInst *aCI, FuncInfo &FInfo); + bool evaluateScalarMathFunc(MangledFuncInfo &FInfo, double &Res0, + double &Res1, Constant *copr0, Constant *copr1, + Constant *copr2); + bool evaluateCall(CallInst *aCI, MangledFuncInfo &FInfo); // exp - bool fold_exp(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); + bool fold_exp(CallInst *CI, IRBuilder<> &B, const MangledFuncInfo &FInfo); // exp2 - bool fold_exp2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); + bool fold_exp2(CallInst *CI, IRBuilder<> &B, const MangledFuncInfo &FInfo); // exp10 - bool fold_exp10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); + bool fold_exp10(CallInst *CI, IRBuilder<> &B, const MangledFuncInfo &FInfo); // log - bool fold_log(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); + bool fold_log(CallInst *CI, IRBuilder<> &B, const MangledFuncInfo &FInfo); // log2 - bool fold_log2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); + bool fold_log2(CallInst *CI, IRBuilder<> &B, const MangledFuncInfo &FInfo); // log10 - bool fold_log10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); + bool fold_log10(CallInst *CI, IRBuilder<> &B, const MangledFuncInfo &FInfo); // sqrt - bool fold_sqrt(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo); + bool fold_sqrt(CallInst *CI, IRBuilder<> &B, const MangledFuncInfo &FInfo); // sin/cos bool fold_sincos(CallInst * CI, IRBuilder<> &B, AliasAnalysis * AA); + // __read_pipe/__write_pipe + bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, + UnmangledFuncInfo &FInfo); + // Get insertion point at entry. BasicBlock::iterator getEntryIns(CallInst * UI); // Insert an Alloc instruction. AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix); // Get a scalar native builtin signle argument FP function - Constant* getNativeFunction(Module* M, const FuncInfo &FInfo); + Constant *getNativeFunction(Module *M, const MangledFuncInfo &FInfo); + // Fold library function with mangled name. + bool foldMangledFunction(CallInst *CI, MangledFuncInfo &Info, IRBuilder<> &B, + AliasAnalysis *AA = nullptr); + // Fold library function with unmangled name. + bool foldUnmangledFunction(CallInst *CI, UnmangledFuncInfo &Info, + IRBuilder<> &B, AliasAnalysis *AA = nullptr); protected: CallInst *CI; @@ -457,25 +469,26 @@ return TableRef(); } -static inline int getVecSize(const AMDGPULibFunc& FInfo) { +static inline int getVecSize(const AMDGPUMangledLibFunc &FInfo) { return FInfo.Leads[0].VectorSize; } -static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) { +static inline AMDGPULibFunc::EType +getArgType(const AMDGPUMangledLibFunc &FInfo) { return (AMDGPULibFunc::EType)FInfo.Leads[0].ArgType; } -Constant *AMDGPULibCalls::getFunction(Module *M, const FuncInfo& fInfo) { +Constant *AMDGPULibCalls::getFunction(Module *M, const MangledFuncInfo &fInfo) { // If we are doing PreLinkOpt, the function is external. So it is safe to // use getOrInsertFunction() at this stage. - return EnablePreLink ? AMDGPULibFunc::getOrInsertFunction(M, fInfo) - : AMDGPULibFunc::getFunction(M, fInfo); + return EnablePreLink ? AMDGPUMangledLibFunc::getOrInsertFunction(M, fInfo) + : AMDGPUMangledLibFunc::getFunction(M, fInfo); } -bool AMDGPULibCalls::parseFunctionName(const StringRef& FMangledName, - FuncInfo *FInfo) { - return AMDGPULibFunc::parse(FMangledName, *FInfo); +std::unique_ptr +AMDGPULibCalls::parseFunctionName(const StringRef &Name) { + return AMDGPULibFunc::parse(Name); } bool AMDGPULibCalls::isUnsafeMath(const CallInst *CI) const { @@ -498,7 +511,8 @@ UseNative.begin()->empty()); } -bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) { +bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, + const MangledFuncInfo &FInfo) { bool native_sin = useNativeFunc("sin"); bool native_cos = useNativeFunc("cos"); @@ -506,7 +520,7 @@ Module *M = aCI->getModule(); Value *opr0 = aCI->getArgOperand(0); - AMDGPULibFunc nf; + AMDGPUMangledLibFunc nf; nf.Leads[0].ArgType = FInfo.Leads[0].ArgType; nf.Leads[0].VectorSize = FInfo.Leads[0].VectorSize; @@ -536,20 +550,23 @@ CI = aCI; Function *Callee = aCI->getCalledFunction(); - FuncInfo FInfo; - if (!parseFunctionName(Callee->getName(), &FInfo) || - FInfo.getPrefix() != AMDGPULibFunc::NOPFX || - getArgType(FInfo) == AMDGPULibFunc::F64 || - !HasNative(FInfo.getId()) || - !(AllNative || useNativeFunc(FInfo.getName())) ) { + auto PInfo = parseFunctionName(Callee->getName()); + auto *FInfo = dyn_cast_or_null(PInfo.get()); + + if (!FInfo) + return false; + + if (FInfo->getPrefix() != AMDGPULibFunc::NOPFX || + getArgType(*FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo->getId()) || + !(AllNative || useNativeFunc(FInfo->getUnmangledName()))) { return false; } - if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS) - return sincosUseNative(aCI, FInfo); + if (FInfo->getId() == AMDGPULibFunc::EI_SINCOS) + return sincosUseNative(aCI, *FInfo); - FInfo.setPrefix(AMDGPULibFunc::NATIVE); - Constant *F = getFunction(aCI->getModule(), FInfo); + FInfo->setPrefix(AMDGPULibFunc::NATIVE); + Constant *F = getFunction(aCI->getModule(), *FInfo); if (!F) return false; @@ -559,6 +576,73 @@ return true; } +// Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe +// builtin, with appended type size and alignment arguments, where 2 or 4 +// indicates the original number of arguments. The library has optimized version +// of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same +// power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N +// for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ..., +// 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4. +bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, + UnmangledFuncInfo &FInfo) { + auto *Callee = CI->getCalledFunction(); + if (!Callee->isDeclaration()) + return false; + + assert(Callee->hasName() && "Invalid read_pipe/write_pipe function"); + auto *M = Callee->getParent(); + auto &Ctx = M->getContext(); + std::string Name = Callee->getName(); + auto NumArg = CI->getNumArgOperands(); + if (NumArg != 4 && NumArg != 6) + return false; + auto *PacketSize = CI->getArgOperand(NumArg - 2); + auto *PacketAlign = CI->getArgOperand(NumArg - 1); + if (!isa(PacketSize) || !isa(PacketAlign)) + return false; + unsigned Size = cast(PacketSize)->getZExtValue(); + unsigned Align = cast(PacketAlign)->getZExtValue(); + if (Size != Align || !isPowerOf2_32(Size)) + return false; + + Type *PtrElemTy; + if (Size <= 8) + PtrElemTy = Type::getIntNTy(Ctx, Size * 8); + else + PtrElemTy = VectorType::get(Type::getInt64Ty(Ctx), Size / 8); + unsigned PtrArgLoc = CI->getNumArgOperands() - 3; + auto PtrArg = CI->getArgOperand(PtrArgLoc); + unsigned PtrArgAS = PtrArg->getType()->getPointerAddressSpace(); + auto *PtrTy = llvm::PointerType::get(PtrElemTy, PtrArgAS); + + SmallVector ArgTys; + for (unsigned I = 0; I != PtrArgLoc; ++I) + ArgTys.push_back(CI->getArgOperand(I)->getType()); + ArgTys.push_back(PtrTy); + + Name = Name + "_" + std::to_string(Size); + + auto *FTy = FunctionType::get(Callee->getReturnType(), + ArrayRef(ArgTys), false); + auto *BCast = B.CreatePointerCast(PtrArg, PtrTy); + + SmallVector Args; + for (unsigned I = 0; I != PtrArgLoc; ++I) + Args.push_back(CI->getArgOperand(I)); + Args.push_back(BCast); + + FInfo.setName(Name); + FInfo.setFunctionType(FTy); + auto *F = AMDGPULibFunc::getOrInsertFunction(M, FInfo); + auto *NCI = B.CreateCall(F, Args); + NCI->setAttributes(CI->getAttributes()); + CI->replaceAllUsesWith(NCI); + CI->dropAllReferences(); + CI->eraseFromParent(); + + return true; +} + // This function returns false if no change; return true otherwise. bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) { this->CI = CI; @@ -567,10 +651,11 @@ // Ignore indirect calls. if (Callee == 0) return false; - FuncInfo FInfo; - if (!parseFunctionName(Callee->getName(), &FInfo)) + auto PFInfo = parseFunctionName(Callee->getName()); + if (!PFInfo) return false; + auto &FInfo = *PFInfo; // Further check the number of arguments to see if they match. if (CI->getNumArgOperands() != FInfo.getNumArgs()) return false; @@ -586,6 +671,15 @@ if (const FPMathOperator *FPOp = dyn_cast(CI)) B.setFastMathFlags(FPOp->getFastMathFlags()); + if (auto *Mangled = dyn_cast(&FInfo)) + return foldMangledFunction(CI, *Mangled, B, AA); + + auto *Unmangled = cast(&FInfo); + return foldUnmangledFunction(CI, *Unmangled, B, AA); +} + +bool AMDGPULibCalls::foldMangledFunction(CallInst *CI, MangledFuncInfo &FInfo, + IRBuilder<> &B, AliasAnalysis *AA) { if (TDOFold(CI, FInfo)) return true; @@ -636,6 +730,22 @@ return fold_sincos(CI, B, AA); break; + default: + break; + } + + return false; +} + +bool AMDGPULibCalls::foldUnmangledFunction(CallInst *CI, + UnmangledFuncInfo &FInfo, + IRBuilder<> &B, AliasAnalysis *AA) { + switch (FInfo.getId()) { + case AMDGPULibFunc::EI_READ_PIPE_2: + case AMDGPULibFunc::EI_READ_PIPE_4: + case AMDGPULibFunc::EI_WRITE_PIPE_2: + case AMDGPULibFunc::EI_WRITE_PIPE_4: + return fold_read_write_pipe(CI, B, FInfo); default: break; @@ -644,7 +754,7 @@ return false; } -bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) { +bool AMDGPULibCalls::TDOFold(CallInst *CI, const MangledFuncInfo &FInfo) { // Table-Driven optimization const TableRef tr = getOptTable(FInfo.getId()); if (tr.size==0) @@ -710,14 +820,15 @@ return false; } -bool AMDGPULibCalls::replaceWithNative(CallInst *CI, const FuncInfo &FInfo) { +bool AMDGPULibCalls::replaceWithNative(CallInst *CI, + const MangledFuncInfo &FInfo) { Module *M = CI->getModule(); if (getArgType(FInfo) != AMDGPULibFunc::F32 || FInfo.getPrefix() != AMDGPULibFunc::NOPFX || !HasNative(FInfo.getId())) return false; - AMDGPULibFunc nf = FInfo; + AMDGPUMangledLibFunc nf = FInfo; nf.setPrefix(AMDGPULibFunc::NATIVE); if (Constant *FPExpr = getFunction(M, nf)) { DEBUG(dbgs() << "AMDIC: " << *CI << " ---> "); @@ -733,7 +844,7 @@ // [native_]half_recip(c) ==> 1.0/c bool AMDGPULibCalls::fold_recip(CallInst *CI, IRBuilder<> &B, - const FuncInfo &FInfo) { + const MangledFuncInfo &FInfo) { Value *opr0 = CI->getArgOperand(0); if (ConstantFP *CF = dyn_cast(opr0)) { // Just create a normal div. Later, InstCombine will be able @@ -752,7 +863,7 @@ // [native_]half_divide(x, c) ==> x/c bool AMDGPULibCalls::fold_divide(CallInst *CI, IRBuilder<> &B, - const FuncInfo &FInfo) { + const MangledFuncInfo &FInfo) { Value *opr0 = CI->getArgOperand(0); Value *opr1 = CI->getArgOperand(1); ConstantFP *CF0 = dyn_cast(opr0); @@ -782,7 +893,7 @@ } bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B, - const FuncInfo &FInfo) { + const MangledFuncInfo &FInfo) { assert((FInfo.getId() == AMDGPULibFunc::EI_POW || FInfo.getId() == AMDGPULibFunc::EI_POWR || FInfo.getId() == AMDGPULibFunc::EI_POWN) && @@ -861,11 +972,13 @@ if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) { // pow[r](x, [-]0.5) = sqrt(x) bool issqrt = CF->isExactlyValue(0.5); - if (Constant *FPExpr = getFunction(M, - AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT - : AMDGPULibFunc::EI_RSQRT, FInfo))) { + if (Constant *FPExpr = getFunction( + M, AMDGPUMangledLibFunc(issqrt ? AMDGPULibFunc::EI_SQRT + : AMDGPULibFunc::EI_RSQRT, + FInfo))) { DEBUG(errs() << "AMDIC: " << *CI << " ---> " - << FInfo.getName().c_str() << "(" << *opr0 << ")\n"); + << FInfo.getUnmangledName().c_str() << "(" << *opr0 + << ")\n"); Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt" : "__pow2rsqrt"); replaceCall(nval); @@ -929,8 +1042,8 @@ // powr ---> exp2(y * log2(x)) // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31)) - Constant *ExpExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, - FInfo)); + Constant *ExpExpr = + getFunction(M, AMDGPUMangledLibFunc(AMDGPULibFunc::EI_EXP2, FInfo)); if (!ExpExpr) return false; @@ -1016,8 +1129,8 @@ Value *nval; if (needabs) { - Constant *AbsExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_FABS, - FInfo)); + Constant *AbsExpr = + getFunction(M, AMDGPUMangledLibFunc(AMDGPULibFunc::EI_FABS, FInfo)); if (!AbsExpr) return false; nval = CreateCallEx(B, AbsExpr, opr0, "__fabs"); @@ -1025,8 +1138,8 @@ nval = cnval ? cnval : opr0; } if (needlog) { - Constant *LogExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, - FInfo)); + Constant *LogExpr = + getFunction(M, AMDGPUMangledLibFunc(AMDGPULibFunc::EI_LOG2, FInfo)); if (!LogExpr) return false; nval = CreateCallEx(B,LogExpr, nval, "__log2"); @@ -1067,7 +1180,7 @@ } bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B, - const FuncInfo &FInfo) { + const MangledFuncInfo &FInfo) { Value *opr0 = CI->getArgOperand(0); Value *opr1 = CI->getArgOperand(1); @@ -1086,8 +1199,8 @@ std::vector ParamsTys; ParamsTys.push_back(opr0->getType()); Module *M = CI->getModule(); - if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, - FInfo))) { + if (Constant *FPExpr = getFunction( + M, AMDGPUMangledLibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) { DEBUG(errs() << "AMDIC: " << *CI << " ---> sqrt(" << *opr0 << ")\n"); Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt"); replaceCall(nval); @@ -1095,8 +1208,8 @@ } } else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x) Module *M = CI->getModule(); - if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, - FInfo))) { + if (Constant *FPExpr = getFunction( + M, AMDGPUMangledLibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) { DEBUG(errs() << "AMDIC: " << *CI << " ---> cbrt(" << *opr0 << ")\n"); Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt"); replaceCall(nval); @@ -1113,8 +1226,8 @@ std::vector ParamsTys; ParamsTys.push_back(opr0->getType()); Module *M = CI->getModule(); - if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT, - FInfo))) { + if (Constant *FPExpr = getFunction( + M, AMDGPUMangledLibFunc(AMDGPULibFunc::EI_RSQRT, FInfo))) { DEBUG(errs() << "AMDIC: " << *CI << " ---> rsqrt(" << *opr0 << ")\n"); Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt"); replaceCall(nval); @@ -1125,7 +1238,7 @@ } bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B, - const FuncInfo &FInfo) { + const MangledFuncInfo &FInfo) { Value *opr0 = CI->getArgOperand(0); Value *opr1 = CI->getArgOperand(1); Value *opr2 = CI->getArgOperand(2); @@ -1169,21 +1282,23 @@ } // Get a scalar native builtin signle argument FP function -Constant* AMDGPULibCalls::getNativeFunction(Module* M, const FuncInfo& FInfo) { +Constant *AMDGPULibCalls::getNativeFunction(Module *M, + const MangledFuncInfo &FInfo) { if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId())) return nullptr; - FuncInfo nf = FInfo; + MangledFuncInfo nf = FInfo; nf.setPrefix(AMDGPULibFunc::NATIVE); return getFunction(M, nf); } // fold sqrt -> native_sqrt (x) bool AMDGPULibCalls::fold_sqrt(CallInst *CI, IRBuilder<> &B, - const FuncInfo &FInfo) { + const MangledFuncInfo &FInfo) { if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) && (FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) { if (Constant *FPExpr = getNativeFunction( - CI->getModule(), AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) { + CI->getModule(), + AMDGPUMangledLibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) { Value *opr0 = CI->getArgOperand(0); DEBUG(errs() << "AMDIC: " << *CI << " ---> " << "sqrt(" << *opr0 << ")\n"); @@ -1198,10 +1313,12 @@ // fold sin, cos -> sincos. bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B, AliasAnalysis *AA) { - AMDGPULibFunc fInfo; - if (!AMDGPULibFunc::parse(CI->getCalledFunction()->getName(), fInfo)) + auto Info = AMDGPULibFunc::parse(CI->getCalledFunction()->getName()); + AMDGPUMangledLibFunc *pInfo = cast(Info.get()); + if (!pInfo) return false; + AMDGPUMangledLibFunc &fInfo = *pInfo; assert(fInfo.getId() == AMDGPULibFunc::EI_SIN || fInfo.getId() == AMDGPULibFunc::EI_COS); bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN; @@ -1258,7 +1375,7 @@ // for OpenCL 2.0 we have only generic implementation of sincos // function. - AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo); + AMDGPUMangledLibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo); nf.Leads[0].PtrKind = AMDGPULibFunc::GENERIC; Function *Fsincos = dyn_cast_or_null(getFunction(M, nf)); if (!Fsincos) return false; @@ -1320,8 +1437,8 @@ return Alloc; } -bool AMDGPULibCalls::evaluateScalarMathFunc(FuncInfo &FInfo, - double& Res0, double& Res1, +bool AMDGPULibCalls::evaluateScalarMathFunc(MangledFuncInfo &FInfo, + double &Res0, double &Res1, Constant *copr0, Constant *copr1, Constant *copr2) { // By default, opr0/opr1/opr3 holds values of float/double type. @@ -1515,7 +1632,7 @@ return false; } -bool AMDGPULibCalls::evaluateCall(CallInst *aCI, FuncInfo &FInfo) { +bool AMDGPULibCalls::evaluateCall(CallInst *aCI, MangledFuncInfo &FInfo) { int numArgs = (int)aCI->getNumArgOperands(); if (numArgs > 3) return false; Index: lib/Target/AMDGPU/AMDGPULibFunc.h =================================================================== --- lib/Target/AMDGPU/AMDGPULibFunc.h +++ lib/Target/AMDGPU/AMDGPULibFunc.h @@ -26,6 +26,14 @@ // IMPORTANT: enums below should go in ascending by 1 value order // because they are used as indexes in the mangling rules table. // don't use explicit value assignment. + // + // There are two types of library functions: those with mangled + // name and those with unmangled name. The enums for the library + // functions with mangled name are defined before enums for the + // library functions with unmangled name. The enum for the last + // library function with mangled name is EI_LAST_MANGLED. + // + // Library functions with mangled name. EI_ABS, EI_ABS_DIFF, EI_ACOS, @@ -144,7 +152,6 @@ EI_POWR, EI_PREFETCH, EI_RADIANS, - EI_READ_PIPE, EI_RECIP, EI_REMAINDER, EI_REMQUO, @@ -212,7 +219,6 @@ EI_WRITE_IMAGEF, EI_WRITE_IMAGEI, EI_WRITE_IMAGEUI, - EI_WRITE_PIPE, EI_NCOS, EI_NEXP2, EI_NFMA, @@ -225,6 +231,14 @@ EI_FLDEXP, EI_CLASS, EI_RCBRT, + EI_LAST_MANGLED = + EI_RCBRT, /* The last library function with mangled name */ + + // Library functions with unmangled name. + EI_READ_PIPE_2, + EI_READ_PIPE_4, + EI_WRITE_PIPE_2, + EI_WRITE_PIPE_4, EX_INTRINSICS_COUNT }; @@ -300,49 +314,85 @@ }; public: - static bool parse(StringRef mangledName, AMDGPULibFunc &iInfo); - - AMDGPULibFunc(); - AMDGPULibFunc(EFuncId id, const AMDGPULibFunc& copyFrom); + static std::unique_ptr parse(StringRef mangledName); - ENamePrefix getPrefix() const { return FKind; } - EFuncId getId() const { return FuncId; } + explicit AMDGPULibFunc() {} + virtual ~AMDGPULibFunc() {} - std::string getName() const; - unsigned getNumArgs() const; + virtual unsigned getNumArgs() const = 0; - FunctionType* getFunctionType(Module& M) const; + EFuncId getId() const { return FuncId; } - std::string mangle() const; + bool isMangled() const { + return static_cast(FuncId) <= + static_cast(EI_LAST_MANGLED); + } - void setPrefix(ENamePrefix pfx) { FKind = pfx; } void setId(EFuncId id) { FuncId = id; } + virtual bool parseFuncName(StringRef &mangledName) = 0; - static Function* getFunction(llvm::Module *M, const AMDGPULibFunc& fInfo); + /// \return The mangled function name for mangled library functions + /// and unmangled function name for unmangled library functions. + virtual std::string mangle() const = 0; - static Function* getOrInsertFunction(llvm::Module *M, - const AMDGPULibFunc& fInfo); + void setName(StringRef N) { Name = N; } - static StringRef getUnmangledName(const StringRef& mangledName); + virtual FunctionType *getFunctionType(Module &M) const = 0; + static Function *getFunction(llvm::Module *M, const AMDGPULibFunc &fInfo); - Param Leads[2]; + static Function *getOrInsertFunction(llvm::Module *M, + const AMDGPULibFunc &fInfo); -private: +protected: EFuncId FuncId; - ENamePrefix FKind; - std::string Name; + std::string Name; +}; - void reset(); +class AMDGPUMangledLibFunc : public AMDGPULibFunc { +public: + Param Leads[2]; + + explicit AMDGPUMangledLibFunc(); + explicit AMDGPUMangledLibFunc(EFuncId id, + const AMDGPUMangledLibFunc ©From); + unsigned getNumArgs() const override; + bool parseFuncName(StringRef &mangledName) override; + // Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const AMDGPULibFunc *F) { return F->isMangled(); } - std::string mangleNameItanium() const; - bool parseItanuimName(StringRef& mangledName); + std::string getUnmangledName() const; + FunctionType *getFunctionType(Module &M) const override; + + ENamePrefix getPrefix() const { return FKind; } + void setPrefix(ENamePrefix pfx) { FKind = pfx; } - std::string mangleName(const StringRef& name) const; - bool parseName(const StringRef& mangledName); + static StringRef getUnmangledName(StringRef MangledName); - template - void writeName(Stream& OS) const; + std::string mangle() const override; + +private: + ENamePrefix FKind; + + std::string mangleNameItanium() const; + + std::string mangleName(StringRef Name) const; + bool parseUnmangledName(StringRef MangledName); + + template void writeName(Stream &OS) const; }; +class AMDGPUUnmangledLibFunc : public AMDGPULibFunc { + FunctionType *FuncTy; + +public: + explicit AMDGPUUnmangledLibFunc(); + unsigned getNumArgs() const override; + // Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const AMDGPULibFunc *F) { return !F->isMangled(); } + bool parseFuncName(StringRef &Name) override; + std::string mangle() const override { return Name; } + void setFunctionType(FunctionType *FT) { FuncTy = FT; } + FunctionType *getFunctionType(Module &M) const override { return FuncTy; } +}; } #endif // _AMDGPU_LIBFUNC_H_ Index: lib/Target/AMDGPU/AMDGPULibFunc.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULibFunc.cpp +++ lib/Target/AMDGPU/AMDGPULibFunc.cpp @@ -65,6 +65,51 @@ unsigned getNumArgs() const; }; +// Information about library functions with unmangled names. +class UnmangledFuncInfo { + StringRef const Name; + unsigned NumArgs; + + // Table for all lib functions with unmangled names. + static const UnmangledFuncInfo Table[]; + + // Number of entries in Table. + static const unsigned TableSize; + + // Map function name to index. + class NameMap : public StringMap { + public: + NameMap() { + for (unsigned I = 0; I != TableSize; ++I) + (*this)[Table[I].Name] = I; + } + }; + friend class NameMap; + static NameMap Map; + +public: + using ID = AMDGPULibFunc::EFuncId; + UnmangledFuncInfo() = default; + UnmangledFuncInfo(StringRef _Name, unsigned _NumArgs) + : Name(_Name), NumArgs(_NumArgs) {} + // Get index to Table by function name. + static bool lookup(StringRef Name, ID &Id); + static unsigned toIndex(ID Id) { + assert(static_cast(Id) > + static_cast(AMDGPULibFunc::EI_LAST_MANGLED) && + "Invalid unmangled library function"); + return static_cast(Id) - 1 - + static_cast(AMDGPULibFunc::EI_LAST_MANGLED); + } + static ID toFuncId(unsigned Index) { + assert(Index < TableSize && "Invalid unmangled library function"); + return static_cast( + Index + 1 + static_cast(AMDGPULibFunc::EI_LAST_MANGLED)); + } + static unsigned getNumArgs(ID Id) { return Table[toIndex(Id)].NumArgs; } + static StringRef getName(ID Id) { return Table[toIndex(Id)].Name; } +}; + unsigned ManglingRule::getNumArgs() const { unsigned I=0; while (I < (sizeof Param/sizeof Param[0]) && Param[I]) ++I; @@ -215,7 +260,6 @@ { "powr" , {1}, {E_ANY,E_COPY}}, { "prefetch" , {1}, {E_CONSTPTR_ANY,EX_SIZET}}, { "radians" , {1}, {E_ANY}}, -{ "read_pipe" , {4}, {E_COPY,EX_RESERVEDID,EX_UINT,E_ANY}}, { "recip" , {1}, {E_ANY}}, { "remainder" , {1}, {E_ANY,E_COPY}}, { "remquo" , {1,3}, {E_ANY,E_COPY,E_ANY}}, @@ -283,7 +327,6 @@ { "write_imagef" , {1}, {E_ANY,E_IMAGECOORDS,EX_FLOAT4}}, { "write_imagei" , {1}, {E_ANY,E_IMAGECOORDS,EX_INTV4}}, { "write_imageui" , {1}, {E_ANY,E_IMAGECOORDS,EX_UINTV4}}, -{ "write_pipe" , {4}, {E_COPY,EX_RESERVEDID,EX_UINT,E_ANY}}, { "ncos" , {1}, {E_ANY} }, { "nexp2" , {1}, {E_ANY} }, { "nfma" , {1}, {E_ANY, E_COPY, E_COPY} }, @@ -298,6 +341,19 @@ { "rcbrt" , {1}, {E_ANY} }, }; +// Library functions with unmangled name. +const UnmangledFuncInfo UnmangledFuncInfo::Table[] = { + {"__read_pipe_2", 4}, + {"__read_pipe_4", 6}, + {"__write_pipe_2", 4}, + {"__write_pipe_4", 6}, +}; + +const unsigned UnmangledFuncInfo::TableSize = + sizeof(UnmangledFuncInfo::Table) / sizeof(UnmangledFuncInfo::Table[0]); + +UnmangledFuncInfo::NameMap UnmangledFuncInfo::Map; + static const struct ManglingRulesMap : public StringMap { ManglingRulesMap() : StringMap(sizeof(manglingRules)/sizeof(manglingRules[0])) { @@ -461,18 +517,7 @@ } // end anonymous namespace -AMDGPULibFunc::AMDGPULibFunc() { - reset(); -} - -AMDGPULibFunc::AMDGPULibFunc(EFuncId id, const AMDGPULibFunc& copyFrom) - : FuncId(id) { - FKind = copyFrom.FKind; - Leads[0] = copyFrom.Leads[0]; - Leads[1] = copyFrom.Leads[1]; -} - -void AMDGPULibFunc::reset() { +AMDGPUMangledLibFunc::AMDGPUMangledLibFunc() { FuncId = EI_NONE; FKind = NOPFX; Leads[0].reset(); @@ -480,6 +525,16 @@ Name.clear(); } +AMDGPUUnmangledLibFunc::AMDGPUUnmangledLibFunc() { FuncId = EI_NONE; } + +AMDGPUMangledLibFunc::AMDGPUMangledLibFunc( + EFuncId id, const AMDGPUMangledLibFunc ©From) { + FuncId = id; + FKind = copyFrom.FKind; + Leads[0] = copyFrom.Leads[0]; + Leads[1] = copyFrom.Leads[1]; +} + /////////////////////////////////////////////////////////////////////////////// // Demangling @@ -508,8 +563,8 @@ return Pfx; } -bool AMDGPULibFunc::parseName(const StringRef& fullName) { - FuncId = static_cast(manglingRulesMap.lookup(fullName)); +bool AMDGPUMangledLibFunc::parseUnmangledName(StringRef FullName) { + FuncId = static_cast(manglingRulesMap.lookup(FullName)); return FuncId != EI_NONE; } @@ -601,10 +656,11 @@ return true; } -bool AMDGPULibFunc::parseItanuimName(StringRef& mangledName) { +bool AMDGPUMangledLibFunc::parseFuncName(StringRef &mangledName) { StringRef Name = eatLengthPrefixedName(mangledName); FKind = parseNamePrefix(Name); - if (!parseName(Name)) return false; + if (!parseUnmangledName(Name)) + return false; const ManglingRule& Rule = manglingRules[FuncId]; ItaniumParamParser Parser; @@ -619,30 +675,40 @@ return true; } -bool AMDGPULibFunc::parse(StringRef mangledName, AMDGPULibFunc& iInfo) { - iInfo.reset(); - if (mangledName.empty()) +bool AMDGPUUnmangledLibFunc::parseFuncName(StringRef &Name) { + if (!UnmangledFuncInfo::lookup(Name, FuncId)) return false; + setName(Name); + return true; +} - if (eatTerm(mangledName, "_Z")) { - return iInfo.parseItanuimName(mangledName); - } - return false; +std::unique_ptr AMDGPULibFunc::parse(StringRef FuncName) { + if (FuncName.empty()) + return std::unique_ptr(); + + std::unique_ptr LibF; + if (eatTerm(FuncName, "_Z")) + LibF = make_unique(); + else + LibF = make_unique(); + if (LibF->parseFuncName(FuncName)) + return LibF; + + return std::unique_ptr(); } -StringRef AMDGPULibFunc::getUnmangledName(const StringRef& mangledName) { +StringRef AMDGPUMangledLibFunc::getUnmangledName(StringRef mangledName) { StringRef S = mangledName; if (eatTerm(S, "_Z")) return eatLengthPrefixedName(S); return StringRef(); } - /////////////////////////////////////////////////////////////////////////////// // Mangling template -void AMDGPULibFunc::writeName(Stream& OS) const { +void AMDGPUMangledLibFunc::writeName(Stream &OS) const { const char *Pfx = ""; switch (FKind) { case NATIVE: Pfx = "native_"; break; @@ -658,9 +724,7 @@ } } -std::string AMDGPULibFunc::mangle() const { - return mangleNameItanium(); -} +std::string AMDGPUMangledLibFunc::mangle() const { return mangleNameItanium(); } /////////////////////////////////////////////////////////////////////////////// // Itanium Mangling @@ -788,7 +852,7 @@ }; } // namespace -std::string AMDGPULibFunc::mangleNameItanium() const { +std::string AMDGPUMangledLibFunc::mangleNameItanium() const { SmallString<128> Buf; raw_svector_ostream S(Buf); SmallString<128> NameBuf; @@ -850,7 +914,7 @@ return T; } -FunctionType* AMDGPULibFunc::getFunctionType(Module& M) const { +FunctionType *AMDGPUMangledLibFunc::getFunctionType(Module &M) const { LLVMContext& C = M.getContext(); std::vector Args; ParamIterator I(Leads, manglingRules[FuncId]); @@ -863,18 +927,22 @@ Args, false); } -unsigned AMDGPULibFunc::getNumArgs() const { +unsigned AMDGPUMangledLibFunc::getNumArgs() const { return manglingRules[FuncId].getNumArgs(); } -std::string AMDGPULibFunc::getName() const { +unsigned AMDGPUUnmangledLibFunc::getNumArgs() const { + return UnmangledFuncInfo::getNumArgs(FuncId); +} + +std::string AMDGPUMangledLibFunc::getUnmangledName() const { SmallString<128> Buf; raw_svector_ostream OS(Buf); writeName(OS); return OS.str(); } -Function *AMDGPULibFunc::getFunction(Module *M, const AMDGPULibFunc& fInfo) { +Function *AMDGPULibFunc::getFunction(Module *M, const AMDGPULibFunc &fInfo) { std::string FuncName = fInfo.mangle(); Function *F = dyn_cast_or_null( M->getValueSymbolTable().lookup(FuncName)); @@ -889,7 +957,7 @@ } Function *AMDGPULibFunc::getOrInsertFunction(Module *M, - const AMDGPULibFunc& fInfo) { + const AMDGPULibFunc &fInfo) { std::string const FuncName = fInfo.mangle(); Function *F = dyn_cast_or_null( M->getValueSymbolTable().lookup(FuncName)); @@ -929,3 +997,13 @@ return cast(C); } + +bool UnmangledFuncInfo::lookup(StringRef Name, ID &Id) { + auto Loc = Map.find(Name); + if (Loc != Map.end()) { + Id = toFuncId(Loc->second); + return true; + } + Id = AMDGPULibFunc::EI_NONE; + return false; +} Index: test/CodeGen/AMDGPU/simplify-libcalls.ll =================================================================== --- test/CodeGen/AMDGPU/simplify-libcalls.ll +++ test/CodeGen/AMDGPU/simplify-libcalls.ll @@ -1,6 +1,6 @@ -; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-simplify-libcall <%s | FileCheck -check-prefix=GCN -check-prefix=GCN-POSTLINK %s -; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-simplify-libcall -amdgpu-prelink <%s | FileCheck -check-prefix=GCN -check-prefix=GCN-PRELINK %s -; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-use-native -amdgpu-prelink <%s | FileCheck -check-prefix=GCN -check-prefix=GCN-NATIVE %s +; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-simplify-libcall <%s | opt -instnamer -S | FileCheck -check-prefix=GCN -check-prefix=GCN-POSTLINK %s +; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-simplify-libcall -amdgpu-prelink <%s | opt -instnamer -S | FileCheck -check-prefix=GCN -check-prefix=GCN-PRELINK %s +; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-use-native -amdgpu-prelink <%s | opt -instnamer -S | FileCheck -check-prefix=GCN -check-prefix=GCN-NATIVE %s ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos ; GCN-POSTLINK: tail call fast float @_Z3sinf( @@ -299,8 +299,8 @@ ; GCN: %__powx2 = fmul fast float %tmp, %tmp ; GCN: %__powx21 = fmul fast float %__powx2, %__powx2 ; GCN: %__powx22 = fmul fast float %__powx2, %tmp -; GCN: %0 = fmul fast float %__powx21, %__powx21 -; GCN: %__powprod3 = fmul fast float %0, %__powx22 +; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21 +; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22 define amdgpu_kernel void @test_pow_c(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 @@ -314,8 +314,8 @@ ; GCN: %__powx2 = fmul fast float %tmp, %tmp ; GCN: %__powx21 = fmul fast float %__powx2, %__powx2 ; GCN: %__powx22 = fmul fast float %__powx2, %tmp -; GCN: %0 = fmul fast float %__powx21, %__powx21 -; GCN: %__powprod3 = fmul fast float %0, %__powx22 +; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21 +; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22 define amdgpu_kernel void @test_powr_c(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 @@ -331,8 +331,8 @@ ; GCN: %__powx2 = fmul fast float %tmp, %tmp ; GCN: %__powx21 = fmul fast float %__powx2, %__powx2 ; GCN: %__powx22 = fmul fast float %__powx2, %tmp -; GCN: %0 = fmul fast float %__powx21, %__powx21 -; GCN: %__powprod3 = fmul fast float %0, %__powx22 +; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21 +; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22 define amdgpu_kernel void @test_pown_c(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 @@ -350,12 +350,12 @@ ; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %__fabs) ; GCN-PRELINK: %__ylogx = fmul fast float %__log2, 1.013000e+03 ; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx) -; GCN-PRELINK: %0 = bitcast float %tmp to i32 -; GCN-PRELINK: %__pow_sign = and i32 %0, -2147483648 -; GCN-PRELINK: %1 = bitcast float %__exp2 to i32 -; GCN-PRELINK: %2 = or i32 %__pow_sign, %1 -; GCN-PRELINK: %3 = bitcast float addrspace(1)* %a to i32 addrspace(1)* -; GCN-PRELINK: store i32 %2, i32 addrspace(1)* %3, align 4 +; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32 +; GCN-PRELINK: %__pow_sign = and i32 %[[r0]], -2147483648 +; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32 +; GCN-PRELINK: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]] +; GCN-PRELINK: %[[r3:.*]] = bitcast float addrspace(1)* %a to i32 addrspace(1)* +; GCN-PRELINK: store i32 %[[r2]], i32 addrspace(1)* %[[r3]], align 4 define amdgpu_kernel void @test_pow(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 @@ -393,12 +393,12 @@ ; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %pownI2F ; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx) ; GCN-PRELINK: %__yeven = shl i32 %conv, 31 -; GCN-PRELINK: %0 = bitcast float %tmp to i32 -; GCN-PRELINK: %__pow_sign = and i32 %__yeven, %0 -; GCN-PRELINK: %1 = bitcast float %__exp2 to i32 -; GCN-PRELINK: %2 = or i32 %__pow_sign, %1 -; GCN-PRELINK: %3 = bitcast float addrspace(1)* %a to i32 addrspace(1)* -; GCN-PRELINK: store i32 %2, i32 addrspace(1)* %3, align 4 +; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32 +; GCN-PRELINK: %__pow_sign = and i32 %__yeven, %[[r0]] +; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32 +; GCN-PRELINK: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]] +; GCN-PRELINK: %[[r3:.*]] = bitcast float addrspace(1)* %a to i32 addrspace(1)* +; GCN-PRELINK: store i32 %[[r2]], i32 addrspace(1)* %[[r3]], align 4 define amdgpu_kernel void @test_pown(float addrspace(1)* nocapture %a) { entry: %tmp = load float, float addrspace(1)* %a, align 4 @@ -692,3 +692,96 @@ } declare float @_Z6sincosfPU3AS4f(float, float addrspace(4)*) + +%opencl.pipe_t = type opaque +%opencl.reserve_id_t = type opaque + +; GCN-LABEL: {{^}}define amdgpu_kernel void @test_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) +; GCN-PRELINK: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32 addrspace(4)* %{{.*}}) #[[NOUNWIND:[0-9]+]] +; GCN-PRELINK: call i32 @__read_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 2, i32 addrspace(4)* %{{.*}}) #[[NOUNWIND]] +define amdgpu_kernel void @test_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) local_unnamed_addr { +entry: + %0 = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)* + %1 = addrspacecast i8 addrspace(1)* %0 to i8 addrspace(4)* + %2 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8 addrspace(4)* %1, i32 4, i32 4) #0 + %3 = tail call %opencl.reserve_id_t* @__reserve_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4) + %4 = tail call i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t* %3, i32 2, i8 addrspace(4)* %1, i32 4, i32 4) #0 + tail call void @__commit_read_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t* %3, i32 4, i32 4) + ret void +} + +declare i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)*, i8 addrspace(4)*, i32, i32) + +declare %opencl.reserve_id_t* @__reserve_read_pipe(%opencl.pipe_t addrspace(1)*, i32, i32, i32) + +declare i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t*, i32, i8 addrspace(4)*, i32, i32) + +declare void @__commit_read_pipe(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t*, i32, i32) + +; GCN-LABEL: {{^}}define amdgpu_kernel void @test_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) +; GCN-PRELINK: call i32 @__write_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32 addrspace(4)* %{{.*}}) #[[NOUNWIND]] +; GCN-PRELINK: call i32 @__write_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 2, i32 addrspace(4)* %{{.*}}) #[[NOUNWIND]] +define amdgpu_kernel void @test_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) local_unnamed_addr { +entry: + %0 = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)* + %1 = addrspacecast i8 addrspace(1)* %0 to i8 addrspace(4)* + %2 = tail call i32 @__write_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8 addrspace(4)* %1, i32 4, i32 4) #0 + %3 = tail call %opencl.reserve_id_t* @__reserve_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4) #0 + %4 = tail call i32 @__write_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t* %3, i32 2, i8 addrspace(4)* %1, i32 4, i32 4) #0 + tail call void @__commit_write_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t* %3, i32 4, i32 4) #0 + ret void +} + +declare i32 @__write_pipe_2(%opencl.pipe_t addrspace(1)*, i8 addrspace(4)*, i32, i32) local_unnamed_addr + +declare %opencl.reserve_id_t* @__reserve_write_pipe(%opencl.pipe_t addrspace(1)*, i32, i32, i32) local_unnamed_addr + +declare i32 @__write_pipe_4(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t*, i32, i8 addrspace(4)*, i32, i32) local_unnamed_addr + +declare void @__commit_write_pipe(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t*, i32, i32) local_unnamed_addr + +%struct.S = type { [100 x i32] } + +; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pipe_size +; GCN-PRELINK: call i32 @__read_pipe_2_1(%opencl.pipe_t addrspace(1)* %{{.*}} i8 addrspace(4)* %{{.*}}) #[[NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2_2(%opencl.pipe_t addrspace(1)* %{{.*}} i16 addrspace(4)* %{{.*}}) #[[NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}} i32 addrspace(4)* %{{.*}}) #[[NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2_8(%opencl.pipe_t addrspace(1)* %{{.*}} i64 addrspace(4)* %{{.*}}) #[[NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2_16(%opencl.pipe_t addrspace(1)* %{{.*}}, <2 x i64> addrspace(4)* %{{.*}}) #[[NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2_32(%opencl.pipe_t addrspace(1)* %{{.*}}, <4 x i64> addrspace(4)* %{{.*}} #[[NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2_64(%opencl.pipe_t addrspace(1)* %{{.*}}, <8 x i64> addrspace(4)* %{{.*}} #[[NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2_128(%opencl.pipe_t addrspace(1)* %{{.*}}, <16 x i64> addrspace(4)* %{{.*}} #[[NOUNWIND]] +; GCN-PRELINK: call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %{{.*}}, i8 addrspace(4)* %{{.*}} i32 400, i32 4) #[[NOUNWIND]] +define amdgpu_kernel void @test_pipe_size(%opencl.pipe_t addrspace(1)* %p1, i8 addrspace(1)* %ptr1, %opencl.pipe_t addrspace(1)* %p2, i16 addrspace(1)* %ptr2, %opencl.pipe_t addrspace(1)* %p4, i32 addrspace(1)* %ptr4, %opencl.pipe_t addrspace(1)* %p8, i64 addrspace(1)* %ptr8, %opencl.pipe_t addrspace(1)* %p16, <2 x i64> addrspace(1)* %ptr16, %opencl.pipe_t addrspace(1)* %p32, <4 x i64> addrspace(1)* %ptr32, %opencl.pipe_t addrspace(1)* %p64, <8 x i64> addrspace(1)* %ptr64, %opencl.pipe_t addrspace(1)* %p128, <16 x i64> addrspace(1)* %ptr128, %opencl.pipe_t addrspace(1)* %pu, %struct.S addrspace(1)* %ptru) local_unnamed_addr #0 { +entry: + %0 = addrspacecast i8 addrspace(1)* %ptr1 to i8 addrspace(4)* + %1 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p1, i8 addrspace(4)* %0, i32 1, i32 1) #0 + %2 = bitcast i16 addrspace(1)* %ptr2 to i8 addrspace(1)* + %3 = addrspacecast i8 addrspace(1)* %2 to i8 addrspace(4)* + %4 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p2, i8 addrspace(4)* %3, i32 2, i32 2) #0 + %5 = bitcast i32 addrspace(1)* %ptr4 to i8 addrspace(1)* + %6 = addrspacecast i8 addrspace(1)* %5 to i8 addrspace(4)* + %7 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p4, i8 addrspace(4)* %6, i32 4, i32 4) #0 + %8 = bitcast i64 addrspace(1)* %ptr8 to i8 addrspace(1)* + %9 = addrspacecast i8 addrspace(1)* %8 to i8 addrspace(4)* + %10 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p8, i8 addrspace(4)* %9, i32 8, i32 8) #0 + %11 = bitcast <2 x i64> addrspace(1)* %ptr16 to i8 addrspace(1)* + %12 = addrspacecast i8 addrspace(1)* %11 to i8 addrspace(4)* + %13 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p16, i8 addrspace(4)* %12, i32 16, i32 16) #0 + %14 = bitcast <4 x i64> addrspace(1)* %ptr32 to i8 addrspace(1)* + %15 = addrspacecast i8 addrspace(1)* %14 to i8 addrspace(4)* + %16 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p32, i8 addrspace(4)* %15, i32 32, i32 32) #0 + %17 = bitcast <8 x i64> addrspace(1)* %ptr64 to i8 addrspace(1)* + %18 = addrspacecast i8 addrspace(1)* %17 to i8 addrspace(4)* + %19 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p64, i8 addrspace(4)* %18, i32 64, i32 64) #0 + %20 = bitcast <16 x i64> addrspace(1)* %ptr128 to i8 addrspace(1)* + %21 = addrspacecast i8 addrspace(1)* %20 to i8 addrspace(4)* + %22 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p128, i8 addrspace(4)* %21, i32 128, i32 128) #0 + %23 = bitcast %struct.S addrspace(1)* %ptru to i8 addrspace(1)* + %24 = addrspacecast i8 addrspace(1)* %23 to i8 addrspace(4)* + %25 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %pu, i8 addrspace(4)* %24, i32 400, i32 4) #0 + ret void +} + +; CGN-PRELINK: attributes #[[NOUNWIND]] = { nounwind } +attributes #0 = { nounwind }