Index: lib/Target/AMDGPU/AMDGPULibCalls.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -64,6 +64,8 @@
 private:
 
   typedef llvm::AMDGPULibFunc FuncInfo;
+  typedef llvm::AMDGPUMangledLibFunc MangledFuncInfo;
+  typedef llvm::AMDGPUUnmangledLibFunc UnmangledFuncInfo;
 
   // -fuse-native.
   bool AllNative = false;
@@ -72,71 +74,81 @@
 
   // Return a pointer (pointer expr) to the function if function defintion with
   // "FuncName" exists. It may create a new function prototype in pre-link mode.
-  Constant *getFunction(Module *M, const FuncInfo& fInfo);
+  Constant *getFunction(Module *M, const MangledFuncInfo &fInfo);
 
   // Replace a normal function with its native version.
-  bool replaceWithNative(CallInst *CI, const FuncInfo &FInfo);
+  bool replaceWithNative(CallInst *CI, const MangledFuncInfo &FInfo);
 
-  bool parseFunctionName(const StringRef& FMangledName,
-                         FuncInfo *FInfo=nullptr /*out*/);
+  std::unique_ptr<FuncInfo> parseFunctionName(const StringRef &Name);
 
-  bool TDOFold(CallInst *CI, const FuncInfo &FInfo);
+  bool TDOFold(CallInst *CI, const MangledFuncInfo &FInfo);
 
   /* Specialized optimizations */
 
   // recip (half or native)
-  bool fold_recip(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
+  bool fold_recip(CallInst *CI, IRBuilder<> &B, const MangledFuncInfo &FInfo);
 
   // divide (half or native)
-  bool fold_divide(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
+  bool fold_divide(CallInst *CI, IRBuilder<> &B, const MangledFuncInfo &FInfo);
 
   // pow/powr/pown
-  bool fold_pow(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
+  bool fold_pow(CallInst *CI, IRBuilder<> &B, const MangledFuncInfo &FInfo);
 
   // rootn
-  bool fold_rootn(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
+  bool fold_rootn(CallInst *CI, IRBuilder<> &B, const MangledFuncInfo &FInfo);
 
   // fma/mad
-  bool fold_fma_mad(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
+  bool fold_fma_mad(CallInst *CI, IRBuilder<> &B, const MangledFuncInfo &FInfo);
 
   // -fuse-native for sincos
-  bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo);
+  bool sincosUseNative(CallInst *aCI, const MangledFuncInfo &FInfo);
 
   // evaluate calls if calls' arguments are constants.
-  bool evaluateScalarMathFunc(FuncInfo &FInfo, double& Res0,
-    double& Res1, Constant *copr0, Constant *copr1, Constant *copr2);
-  bool evaluateCall(CallInst *aCI, FuncInfo &FInfo);
+  bool evaluateScalarMathFunc(MangledFuncInfo &FInfo, double &Res0,
+                              double &Res1, Constant *copr0, Constant *copr1,
+                              Constant *copr2);
+  bool evaluateCall(CallInst *aCI, MangledFuncInfo &FInfo);
 
   // exp
-  bool fold_exp(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
+  bool fold_exp(CallInst *CI, IRBuilder<> &B, const MangledFuncInfo &FInfo);
 
   // exp2
-  bool fold_exp2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
+  bool fold_exp2(CallInst *CI, IRBuilder<> &B, const MangledFuncInfo &FInfo);
 
   // exp10
-  bool fold_exp10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
+  bool fold_exp10(CallInst *CI, IRBuilder<> &B, const MangledFuncInfo &FInfo);
 
   // log
-  bool fold_log(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
+  bool fold_log(CallInst *CI, IRBuilder<> &B, const MangledFuncInfo &FInfo);
 
   // log2
-  bool fold_log2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
+  bool fold_log2(CallInst *CI, IRBuilder<> &B, const MangledFuncInfo &FInfo);
 
   // log10
-  bool fold_log10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
+  bool fold_log10(CallInst *CI, IRBuilder<> &B, const MangledFuncInfo &FInfo);
 
   // sqrt
-  bool fold_sqrt(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
+  bool fold_sqrt(CallInst *CI, IRBuilder<> &B, const MangledFuncInfo &FInfo);
 
   // sin/cos
   bool fold_sincos(CallInst * CI, IRBuilder<> &B, AliasAnalysis * AA);
 
+  // __read_pipe/__write_pipe
+  bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
+                            UnmangledFuncInfo &FInfo);
+
   // Get insertion point at entry.
   BasicBlock::iterator getEntryIns(CallInst * UI);
   // Insert an Alloc instruction.
   AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix);
   // Get a scalar native builtin signle argument FP function
-  Constant* getNativeFunction(Module* M, const FuncInfo &FInfo);
+  Constant *getNativeFunction(Module *M, const MangledFuncInfo &FInfo);
+  // Fold library function with mangled name.
+  bool foldMangledFunction(CallInst *CI, MangledFuncInfo &Info, IRBuilder<> &B,
+                           AliasAnalysis *AA = nullptr);
+  // Fold library function with unmangled name.
+  bool foldUnmangledFunction(CallInst *CI, UnmangledFuncInfo &Info,
+                             IRBuilder<> &B, AliasAnalysis *AA = nullptr);
 
 protected:
   CallInst *CI;
@@ -457,25 +469,26 @@
   return TableRef();
 }
 
-static inline int getVecSize(const AMDGPULibFunc& FInfo) {
+static inline int getVecSize(const AMDGPUMangledLibFunc &FInfo) {
   return FInfo.Leads[0].VectorSize;
 }
 
-static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) {
+static inline AMDGPULibFunc::EType
+getArgType(const AMDGPUMangledLibFunc &FInfo) {
   return (AMDGPULibFunc::EType)FInfo.Leads[0].ArgType;
 }
 
-Constant *AMDGPULibCalls::getFunction(Module *M, const FuncInfo& fInfo) {
+Constant *AMDGPULibCalls::getFunction(Module *M, const MangledFuncInfo &fInfo) {
   // If we are doing PreLinkOpt, the function is external. So it is safe to
   // use getOrInsertFunction() at this stage.
 
-  return EnablePreLink ? AMDGPULibFunc::getOrInsertFunction(M, fInfo)
-                       : AMDGPULibFunc::getFunction(M, fInfo);
+  return EnablePreLink ? AMDGPUMangledLibFunc::getOrInsertFunction(M, fInfo)
+                       : AMDGPUMangledLibFunc::getFunction(M, fInfo);
 }
 
-bool AMDGPULibCalls::parseFunctionName(const StringRef& FMangledName,
-                                    FuncInfo *FInfo) {
-  return AMDGPULibFunc::parse(FMangledName, *FInfo);
+std::unique_ptr<AMDGPULibCalls::FuncInfo>
+AMDGPULibCalls::parseFunctionName(const StringRef &Name) {
+  return AMDGPULibFunc::parse(Name);
 }
 
 bool AMDGPULibCalls::isUnsafeMath(const CallInst *CI) const {
@@ -498,7 +511,8 @@
                UseNative.begin()->empty());
 }
 
-bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
+bool AMDGPULibCalls::sincosUseNative(CallInst *aCI,
+                                     const MangledFuncInfo &FInfo) {
   bool native_sin = useNativeFunc("sin");
   bool native_cos = useNativeFunc("cos");
 
@@ -506,7 +520,7 @@
     Module *M = aCI->getModule();
     Value *opr0 = aCI->getArgOperand(0);
 
-    AMDGPULibFunc nf;
+    AMDGPUMangledLibFunc nf;
     nf.Leads[0].ArgType = FInfo.Leads[0].ArgType;
     nf.Leads[0].VectorSize = FInfo.Leads[0].VectorSize;
 
@@ -536,20 +550,23 @@
   CI = aCI;
   Function *Callee = aCI->getCalledFunction();
 
-  FuncInfo FInfo;
-  if (!parseFunctionName(Callee->getName(), &FInfo) ||
-      FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
-      getArgType(FInfo) == AMDGPULibFunc::F64 ||
-      !HasNative(FInfo.getId()) ||
-      !(AllNative || useNativeFunc(FInfo.getName())) ) {
+  auto PInfo = parseFunctionName(Callee->getName());
+  auto *FInfo = dyn_cast_or_null<MangledFuncInfo>(PInfo.get());
+
+  if (!FInfo)
+    return false;
+
+  if (FInfo->getPrefix() != AMDGPULibFunc::NOPFX ||
+      getArgType(*FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo->getId()) ||
+      !(AllNative || useNativeFunc(FInfo->getUnmangledName()))) {
     return false;
   }
 
-  if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS)
-    return sincosUseNative(aCI, FInfo);
+  if (FInfo->getId() == AMDGPULibFunc::EI_SINCOS)
+    return sincosUseNative(aCI, *FInfo);
 
-  FInfo.setPrefix(AMDGPULibFunc::NATIVE);
-  Constant *F = getFunction(aCI->getModule(), FInfo);
+  FInfo->setPrefix(AMDGPULibFunc::NATIVE);
+  Constant *F = getFunction(aCI->getModule(), *FInfo);
   if (!F)
     return false;
 
@@ -559,6 +576,73 @@
   return true;
 }
 
+// Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe
+// builtin, with appended type size and alignment arguments, where 2 or 4
+// indicates the original number of arguments. The library has optimized version
+// of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same
+// power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N
+// for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ...,
+// 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4.
+bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
+                                          UnmangledFuncInfo &FInfo) {
+  auto *Callee = CI->getCalledFunction();
+  if (!Callee->isDeclaration())
+    return false;
+
+  assert(Callee->hasName() && "Invalid read_pipe/write_pipe function");
+  auto *M = Callee->getParent();
+  auto &Ctx = M->getContext();
+  std::string Name = Callee->getName();
+  auto NumArg = CI->getNumArgOperands();
+  if (NumArg != 4 && NumArg != 6)
+    return false;
+  auto *PacketSize = CI->getArgOperand(NumArg - 2);
+  auto *PacketAlign = CI->getArgOperand(NumArg - 1);
+  if (!isa<ConstantInt>(PacketSize) || !isa<ConstantInt>(PacketAlign))
+    return false;
+  unsigned Size = cast<ConstantInt>(PacketSize)->getZExtValue();
+  unsigned Align = cast<ConstantInt>(PacketAlign)->getZExtValue();
+  if (Size != Align || !isPowerOf2_32(Size))
+    return false;
+
+  Type *PtrElemTy;
+  if (Size <= 8)
+    PtrElemTy = Type::getIntNTy(Ctx, Size * 8);
+  else
+    PtrElemTy = VectorType::get(Type::getInt64Ty(Ctx), Size / 8);
+  unsigned PtrArgLoc = CI->getNumArgOperands() - 3;
+  auto PtrArg = CI->getArgOperand(PtrArgLoc);
+  unsigned PtrArgAS = PtrArg->getType()->getPointerAddressSpace();
+  auto *PtrTy = llvm::PointerType::get(PtrElemTy, PtrArgAS);
+
+  SmallVector<llvm::Type *, 6> ArgTys;
+  for (unsigned I = 0; I != PtrArgLoc; ++I)
+    ArgTys.push_back(CI->getArgOperand(I)->getType());
+  ArgTys.push_back(PtrTy);
+
+  Name = Name + "_" + std::to_string(Size);
+
+  auto *FTy = FunctionType::get(Callee->getReturnType(),
+                                ArrayRef<Type *>(ArgTys), false);
+  auto *BCast = B.CreatePointerCast(PtrArg, PtrTy);
+
+  SmallVector<Value *, 6> Args;
+  for (unsigned I = 0; I != PtrArgLoc; ++I)
+    Args.push_back(CI->getArgOperand(I));
+  Args.push_back(BCast);
+
+  FInfo.setName(Name);
+  FInfo.setFunctionType(FTy);
+  auto *F = AMDGPULibFunc::getOrInsertFunction(M, FInfo);
+  auto *NCI = B.CreateCall(F, Args);
+  NCI->setAttributes(CI->getAttributes());
+  CI->replaceAllUsesWith(NCI);
+  CI->dropAllReferences();
+  CI->eraseFromParent();
+
+  return true;
+}
+
 // This function returns false if no change; return true otherwise.
 bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) {
   this->CI = CI;
@@ -567,10 +651,11 @@
   // Ignore indirect calls.
   if (Callee == 0) return false;
 
-  FuncInfo FInfo;
-  if (!parseFunctionName(Callee->getName(), &FInfo))
+  auto PFInfo = parseFunctionName(Callee->getName());
+  if (!PFInfo)
     return false;
 
+  auto &FInfo = *PFInfo;
   // Further check the number of arguments to see if they match.
   if (CI->getNumArgOperands() != FInfo.getNumArgs())
     return false;
@@ -586,6 +671,15 @@
   if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(CI))
     B.setFastMathFlags(FPOp->getFastMathFlags());
 
+  if (auto *Mangled = dyn_cast<MangledFuncInfo>(&FInfo))
+    return foldMangledFunction(CI, *Mangled, B, AA);
+
+  auto *Unmangled = cast<UnmangledFuncInfo>(&FInfo);
+  return foldUnmangledFunction(CI, *Unmangled, B, AA);
+}
+
+bool AMDGPULibCalls::foldMangledFunction(CallInst *CI, MangledFuncInfo &FInfo,
+                                         IRBuilder<> &B, AliasAnalysis *AA) {
   if (TDOFold(CI, FInfo))
     return true;
 
@@ -636,6 +730,22 @@
       return fold_sincos(CI, B, AA);
 
     break;
+  default:
+    break;
+  }
+
+  return false;
+}
+
+bool AMDGPULibCalls::foldUnmangledFunction(CallInst *CI,
+                                           UnmangledFuncInfo &FInfo,
+                                           IRBuilder<> &B, AliasAnalysis *AA) {
+  switch (FInfo.getId()) {
+  case AMDGPULibFunc::EI_READ_PIPE_2:
+  case AMDGPULibFunc::EI_READ_PIPE_4:
+  case AMDGPULibFunc::EI_WRITE_PIPE_2:
+  case AMDGPULibFunc::EI_WRITE_PIPE_4:
+    return fold_read_write_pipe(CI, B, FInfo);
 
   default:
     break;
@@ -644,7 +754,7 @@
   return false;
 }
 
-bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
+bool AMDGPULibCalls::TDOFold(CallInst *CI, const MangledFuncInfo &FInfo) {
   // Table-Driven optimization
   const TableRef tr = getOptTable(FInfo.getId());
   if (tr.size==0)
@@ -710,14 +820,15 @@
   return false;
 }
 
-bool AMDGPULibCalls::replaceWithNative(CallInst *CI, const FuncInfo &FInfo) {
+bool AMDGPULibCalls::replaceWithNative(CallInst *CI,
+                                       const MangledFuncInfo &FInfo) {
   Module *M = CI->getModule();
   if (getArgType(FInfo) != AMDGPULibFunc::F32 ||
       FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
       !HasNative(FInfo.getId()))
     return false;
 
-  AMDGPULibFunc nf = FInfo;
+  AMDGPUMangledLibFunc nf = FInfo;
   nf.setPrefix(AMDGPULibFunc::NATIVE);
   if (Constant *FPExpr = getFunction(M, nf)) {
     DEBUG(dbgs() << "AMDIC: " << *CI << " ---> ");
@@ -733,7 +844,7 @@
 
 //  [native_]half_recip(c) ==> 1.0/c
 bool AMDGPULibCalls::fold_recip(CallInst *CI, IRBuilder<> &B,
-                                const FuncInfo &FInfo) {
+                                const MangledFuncInfo &FInfo) {
   Value *opr0 = CI->getArgOperand(0);
   if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
     // Just create a normal div. Later, InstCombine will be able
@@ -752,7 +863,7 @@
 
 //  [native_]half_divide(x, c) ==> x/c
 bool AMDGPULibCalls::fold_divide(CallInst *CI, IRBuilder<> &B,
-                                 const FuncInfo &FInfo) {
+                                 const MangledFuncInfo &FInfo) {
   Value *opr0 = CI->getArgOperand(0);
   Value *opr1 = CI->getArgOperand(1);
   ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
@@ -782,7 +893,7 @@
 }
 
 bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
-                              const FuncInfo &FInfo) {
+                              const MangledFuncInfo &FInfo) {
   assert((FInfo.getId() == AMDGPULibFunc::EI_POW ||
           FInfo.getId() == AMDGPULibFunc::EI_POWR ||
           FInfo.getId() == AMDGPULibFunc::EI_POWN) &&
@@ -861,11 +972,13 @@
   if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) {
     // pow[r](x, [-]0.5) = sqrt(x)
     bool issqrt = CF->isExactlyValue(0.5);
-    if (Constant *FPExpr = getFunction(M,
-        AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
-                             : AMDGPULibFunc::EI_RSQRT, FInfo))) {
+    if (Constant *FPExpr = getFunction(
+            M, AMDGPUMangledLibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
+                                           : AMDGPULibFunc::EI_RSQRT,
+                                    FInfo))) {
       DEBUG(errs() << "AMDIC: " << *CI << " ---> "
-                   << FInfo.getName().c_str() << "(" << *opr0 << ")\n");
+                   << FInfo.getUnmangledName().c_str() << "(" << *opr0
+                   << ")\n");
       Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt"
                                                         : "__pow2rsqrt");
       replaceCall(nval);
@@ -929,8 +1042,8 @@
 
   // powr ---> exp2(y * log2(x))
   // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31))
-  Constant *ExpExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2,
-                                                   FInfo));
+  Constant *ExpExpr =
+      getFunction(M, AMDGPUMangledLibFunc(AMDGPULibFunc::EI_EXP2, FInfo));
   if (!ExpExpr)
     return false;
 
@@ -1016,8 +1129,8 @@
 
   Value *nval;
   if (needabs) {
-    Constant *AbsExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_FABS,
-                                                     FInfo));
+    Constant *AbsExpr =
+        getFunction(M, AMDGPUMangledLibFunc(AMDGPULibFunc::EI_FABS, FInfo));
     if (!AbsExpr)
       return false;
     nval = CreateCallEx(B, AbsExpr, opr0, "__fabs");
@@ -1025,8 +1138,8 @@
     nval = cnval ? cnval : opr0;
   }
   if (needlog) {
-    Constant *LogExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2,
-                                                     FInfo));
+    Constant *LogExpr =
+        getFunction(M, AMDGPUMangledLibFunc(AMDGPULibFunc::EI_LOG2, FInfo));
     if (!LogExpr)
       return false;
     nval = CreateCallEx(B,LogExpr, nval, "__log2");
@@ -1067,7 +1180,7 @@
 }
 
 bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
-                                const FuncInfo &FInfo) {
+                                const MangledFuncInfo &FInfo) {
   Value *opr0 = CI->getArgOperand(0);
   Value *opr1 = CI->getArgOperand(1);
 
@@ -1086,8 +1199,8 @@
     std::vector<const Type*> ParamsTys;
     ParamsTys.push_back(opr0->getType());
     Module *M = CI->getModule();
-    if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT,
-                                                        FInfo))) {
+    if (Constant *FPExpr = getFunction(
+            M, AMDGPUMangledLibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
       DEBUG(errs() << "AMDIC: " << *CI << " ---> sqrt(" << *opr0 << ")\n");
       Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt");
       replaceCall(nval);
@@ -1095,8 +1208,8 @@
     }
   } else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x)
     Module *M = CI->getModule();
-    if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT,
-                                                        FInfo))) {
+    if (Constant *FPExpr = getFunction(
+            M, AMDGPUMangledLibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) {
       DEBUG(errs() << "AMDIC: " << *CI << " ---> cbrt(" << *opr0 << ")\n");
       Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt");
       replaceCall(nval);
@@ -1113,8 +1226,8 @@
     std::vector<const Type*> ParamsTys;
     ParamsTys.push_back(opr0->getType());
     Module *M = CI->getModule();
-    if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT,
-                                                        FInfo))) {
+    if (Constant *FPExpr = getFunction(
+            M, AMDGPUMangledLibFunc(AMDGPULibFunc::EI_RSQRT, FInfo))) {
       DEBUG(errs() << "AMDIC: " << *CI << " ---> rsqrt(" << *opr0 << ")\n");
       Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt");
       replaceCall(nval);
@@ -1125,7 +1238,7 @@
 }
 
 bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B,
-                                  const FuncInfo &FInfo) {
+                                  const MangledFuncInfo &FInfo) {
   Value *opr0 = CI->getArgOperand(0);
   Value *opr1 = CI->getArgOperand(1);
   Value *opr2 = CI->getArgOperand(2);
@@ -1169,21 +1282,23 @@
 }
 
 // Get a scalar native builtin signle argument FP function
-Constant* AMDGPULibCalls::getNativeFunction(Module* M, const FuncInfo& FInfo) {
+Constant *AMDGPULibCalls::getNativeFunction(Module *M,
+                                            const MangledFuncInfo &FInfo) {
   if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))
     return nullptr;
-  FuncInfo nf = FInfo;
+  MangledFuncInfo nf = FInfo;
   nf.setPrefix(AMDGPULibFunc::NATIVE);
   return getFunction(M, nf);
 }
 
 // fold sqrt -> native_sqrt (x)
 bool AMDGPULibCalls::fold_sqrt(CallInst *CI, IRBuilder<> &B,
-                               const FuncInfo &FInfo) {
+                               const MangledFuncInfo &FInfo) {
   if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) &&
       (FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) {
     if (Constant *FPExpr = getNativeFunction(
-        CI->getModule(), AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
+            CI->getModule(),
+            AMDGPUMangledLibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
       Value *opr0 = CI->getArgOperand(0);
       DEBUG(errs() << "AMDIC: " << *CI << " ---> "
                    << "sqrt(" << *opr0 << ")\n");
@@ -1198,10 +1313,12 @@
 // fold sin, cos -> sincos.
 bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
                                  AliasAnalysis *AA) {
-  AMDGPULibFunc fInfo;
-  if (!AMDGPULibFunc::parse(CI->getCalledFunction()->getName(), fInfo))
+  auto Info = AMDGPULibFunc::parse(CI->getCalledFunction()->getName());
+  AMDGPUMangledLibFunc *pInfo = cast<AMDGPUMangledLibFunc>(Info.get());
+  if (!pInfo)
     return false;
 
+  AMDGPUMangledLibFunc &fInfo = *pInfo;
   assert(fInfo.getId() == AMDGPULibFunc::EI_SIN ||
          fInfo.getId() == AMDGPULibFunc::EI_COS);
   bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN;
@@ -1258,7 +1375,7 @@
 
   // for OpenCL 2.0 we have only generic implementation of sincos
   // function.
-  AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo);
+  AMDGPUMangledLibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo);
   nf.Leads[0].PtrKind = AMDGPULibFunc::GENERIC;
   Function *Fsincos = dyn_cast_or_null<Function>(getFunction(M, nf));
   if (!Fsincos) return false;
@@ -1320,8 +1437,8 @@
   return Alloc;
 }
 
-bool AMDGPULibCalls::evaluateScalarMathFunc(FuncInfo &FInfo,
-                                            double& Res0, double& Res1,
+bool AMDGPULibCalls::evaluateScalarMathFunc(MangledFuncInfo &FInfo,
+                                            double &Res0, double &Res1,
                                             Constant *copr0, Constant *copr1,
                                             Constant *copr2) {
   // By default, opr0/opr1/opr3 holds values of float/double type.
@@ -1515,7 +1632,7 @@
   return false;
 }
 
-bool AMDGPULibCalls::evaluateCall(CallInst *aCI, FuncInfo &FInfo) {
+bool AMDGPULibCalls::evaluateCall(CallInst *aCI, MangledFuncInfo &FInfo) {
   int numArgs = (int)aCI->getNumArgOperands();
   if (numArgs > 3)
     return false;
Index: lib/Target/AMDGPU/AMDGPULibFunc.h
===================================================================
--- lib/Target/AMDGPU/AMDGPULibFunc.h
+++ lib/Target/AMDGPU/AMDGPULibFunc.h
@@ -26,6 +26,14 @@
     // IMPORTANT: enums below should go in ascending by 1 value order
     // because they are used as indexes in the mangling rules table.
     // don't use explicit value assignment.
+    //
+    // There are two types of library functions: those with mangled
+    // name and those with unmangled name. The enums for the library
+    // functions with mangled name are defined before enums for the
+    // library functions with unmangled name. The enum for the last
+    // library function with mangled name is EI_LAST_MANGLED.
+    //
+    // Library functions with mangled name.
     EI_ABS,
     EI_ABS_DIFF,
     EI_ACOS,
@@ -144,7 +152,6 @@
     EI_POWR,
     EI_PREFETCH,
     EI_RADIANS,
-    EI_READ_PIPE,
     EI_RECIP,
     EI_REMAINDER,
     EI_REMQUO,
@@ -212,7 +219,6 @@
     EI_WRITE_IMAGEF,
     EI_WRITE_IMAGEI,
     EI_WRITE_IMAGEUI,
-    EI_WRITE_PIPE,
     EI_NCOS,
     EI_NEXP2,
     EI_NFMA,
@@ -225,6 +231,14 @@
     EI_FLDEXP,
     EI_CLASS,
     EI_RCBRT,
+    EI_LAST_MANGLED =
+        EI_RCBRT, /* The last library function with mangled name */
+
+    // Library functions with unmangled name.
+    EI_READ_PIPE_2,
+    EI_READ_PIPE_4,
+    EI_WRITE_PIPE_2,
+    EI_WRITE_PIPE_4,
 
     EX_INTRINSICS_COUNT
   };
@@ -300,49 +314,85 @@
   };
 
 public:
-  static bool      parse(StringRef mangledName, AMDGPULibFunc &iInfo);
-
-  AMDGPULibFunc();
-  AMDGPULibFunc(EFuncId id, const AMDGPULibFunc& copyFrom);
+  static std::unique_ptr<AMDGPULibFunc> parse(StringRef mangledName);
 
-  ENamePrefix   getPrefix() const { return FKind; }
-  EFuncId  getId() const { return FuncId; }
+  explicit AMDGPULibFunc() {}
+  virtual ~AMDGPULibFunc() {}
 
-  std::string   getName() const;
-  unsigned      getNumArgs() const;
+  virtual unsigned getNumArgs() const = 0;
 
-  FunctionType* getFunctionType(Module& M) const;
+  EFuncId getId() const { return FuncId; }
 
-  std::string   mangle() const;
+  bool isMangled() const {
+    return static_cast<unsigned>(FuncId) <=
+           static_cast<unsigned>(EI_LAST_MANGLED);
+  }
 
-  void setPrefix(ENamePrefix pfx) { FKind = pfx; }
   void setId(EFuncId id) { FuncId = id; }
+  virtual bool parseFuncName(StringRef &mangledName) = 0;
 
-  static Function* getFunction(llvm::Module *M, const AMDGPULibFunc& fInfo);
+  /// \return The mangled function name for mangled library functions
+  /// and unmangled function name for unmangled library functions.
+  virtual std::string mangle() const = 0;
 
-  static Function* getOrInsertFunction(llvm::Module *M,
-                                       const AMDGPULibFunc& fInfo);
+  void setName(StringRef N) { Name = N; }
 
-  static StringRef getUnmangledName(const StringRef& mangledName);
+  virtual FunctionType *getFunctionType(Module &M) const = 0;
+  static Function *getFunction(llvm::Module *M, const AMDGPULibFunc &fInfo);
 
-  Param         Leads[2];
+  static Function *getOrInsertFunction(llvm::Module *M,
+                                       const AMDGPULibFunc &fInfo);
 
-private:
+protected:
   EFuncId       FuncId;
-  ENamePrefix   FKind;
-  std::string   Name;
+  std::string Name;
+};
 
-  void          reset();
+class AMDGPUMangledLibFunc : public AMDGPULibFunc {
+public:
+  Param Leads[2];
+
+  explicit AMDGPUMangledLibFunc();
+  explicit AMDGPUMangledLibFunc(EFuncId id,
+                                const AMDGPUMangledLibFunc &copyFrom);
+  unsigned getNumArgs() const override;
+  bool parseFuncName(StringRef &mangledName) override;
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static bool classof(const AMDGPULibFunc *F) { return F->isMangled(); }
 
-  std::string   mangleNameItanium() const;
-  bool          parseItanuimName(StringRef& mangledName);
+  std::string getUnmangledName() const;
+  FunctionType *getFunctionType(Module &M) const override;
+
+  ENamePrefix getPrefix() const { return FKind; }
+  void setPrefix(ENamePrefix pfx) { FKind = pfx; }
 
-  std::string   mangleName(const StringRef& name) const;
-  bool          parseName(const StringRef& mangledName);
+  static StringRef getUnmangledName(StringRef MangledName);
 
-  template <typename Stream>
-  void          writeName(Stream& OS) const;
+  std::string mangle() const override;
+
+private:
+  ENamePrefix FKind;
+
+  std::string mangleNameItanium() const;
+
+  std::string mangleName(StringRef Name) const;
+  bool parseUnmangledName(StringRef MangledName);
+
+  template <typename Stream> void writeName(Stream &OS) const;
 };
 
+class AMDGPUUnmangledLibFunc : public AMDGPULibFunc {
+  FunctionType *FuncTy;
+
+public:
+  explicit AMDGPUUnmangledLibFunc();
+  unsigned getNumArgs() const override;
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static bool classof(const AMDGPULibFunc *F) { return !F->isMangled(); }
+  bool parseFuncName(StringRef &Name) override;
+  std::string mangle() const override { return Name; }
+  void setFunctionType(FunctionType *FT) { FuncTy = FT; }
+  FunctionType *getFunctionType(Module &M) const override { return FuncTy; }
+};
 }
 #endif // _AMDGPU_LIBFUNC_H_
Index: lib/Target/AMDGPU/AMDGPULibFunc.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPULibFunc.cpp
+++ lib/Target/AMDGPU/AMDGPULibFunc.cpp
@@ -65,6 +65,51 @@
    unsigned getNumArgs() const;
 };
 
+// Information about library functions with unmangled names.
+class UnmangledFuncInfo {
+  StringRef const Name;
+  unsigned NumArgs;
+
+  // Table for all lib functions with unmangled names.
+  static const UnmangledFuncInfo Table[];
+
+  // Number of entries in Table.
+  static const unsigned TableSize;
+
+  // Map function name to index.
+  class NameMap : public StringMap<unsigned> {
+  public:
+    NameMap() {
+      for (unsigned I = 0; I != TableSize; ++I)
+        (*this)[Table[I].Name] = I;
+    }
+  };
+  friend class NameMap;
+  static NameMap Map;
+
+public:
+  using ID = AMDGPULibFunc::EFuncId;
+  UnmangledFuncInfo() = default;
+  UnmangledFuncInfo(StringRef _Name, unsigned _NumArgs)
+      : Name(_Name), NumArgs(_NumArgs) {}
+  // Get index to Table by function name.
+  static bool lookup(StringRef Name, ID &Id);
+  static unsigned toIndex(ID Id) {
+    assert(static_cast<unsigned>(Id) >
+               static_cast<unsigned>(AMDGPULibFunc::EI_LAST_MANGLED) &&
+           "Invalid unmangled library function");
+    return static_cast<unsigned>(Id) - 1 -
+           static_cast<unsigned>(AMDGPULibFunc::EI_LAST_MANGLED);
+  }
+  static ID toFuncId(unsigned Index) {
+    assert(Index < TableSize && "Invalid unmangled library function");
+    return static_cast<ID>(
+        Index + 1 + static_cast<unsigned>(AMDGPULibFunc::EI_LAST_MANGLED));
+  }
+  static unsigned getNumArgs(ID Id) { return Table[toIndex(Id)].NumArgs; }
+  static StringRef getName(ID Id) { return Table[toIndex(Id)].Name; }
+};
+
 unsigned ManglingRule::getNumArgs() const {
    unsigned I=0;
    while (I < (sizeof Param/sizeof Param[0]) && Param[I]) ++I;
@@ -215,7 +260,6 @@
 { "powr"                            , {1},   {E_ANY,E_COPY}},
 { "prefetch"                        , {1},   {E_CONSTPTR_ANY,EX_SIZET}},
 { "radians"                         , {1},   {E_ANY}},
-{ "read_pipe"                       , {4},   {E_COPY,EX_RESERVEDID,EX_UINT,E_ANY}},
 { "recip"                           , {1},   {E_ANY}},
 { "remainder"                       , {1},   {E_ANY,E_COPY}},
 { "remquo"                          , {1,3}, {E_ANY,E_COPY,E_ANY}},
@@ -283,7 +327,6 @@
 { "write_imagef"                    , {1},   {E_ANY,E_IMAGECOORDS,EX_FLOAT4}},
 { "write_imagei"                    , {1},   {E_ANY,E_IMAGECOORDS,EX_INTV4}},
 { "write_imageui"                   , {1},   {E_ANY,E_IMAGECOORDS,EX_UINTV4}},
-{ "write_pipe"                      , {4},   {E_COPY,EX_RESERVEDID,EX_UINT,E_ANY}},
 { "ncos"                            , {1},   {E_ANY} },
 { "nexp2"                           , {1},   {E_ANY} },
 { "nfma"                            , {1},   {E_ANY, E_COPY, E_COPY} },
@@ -298,6 +341,19 @@
 { "rcbrt"                           , {1},   {E_ANY} },
 };
 
+// Library functions with unmangled name.
+const UnmangledFuncInfo UnmangledFuncInfo::Table[] = {
+    {"__read_pipe_2", 4},
+    {"__read_pipe_4", 6},
+    {"__write_pipe_2", 4},
+    {"__write_pipe_4", 6},
+};
+
+const unsigned UnmangledFuncInfo::TableSize =
+    sizeof(UnmangledFuncInfo::Table) / sizeof(UnmangledFuncInfo::Table[0]);
+
+UnmangledFuncInfo::NameMap UnmangledFuncInfo::Map;
+
 static const struct ManglingRulesMap : public StringMap<int> {
   ManglingRulesMap()
     : StringMap<int>(sizeof(manglingRules)/sizeof(manglingRules[0])) {
@@ -461,18 +517,7 @@
 
 } // end anonymous namespace
 
-AMDGPULibFunc::AMDGPULibFunc() {
-  reset();
-}
-
-AMDGPULibFunc::AMDGPULibFunc(EFuncId id, const AMDGPULibFunc& copyFrom)
-  : FuncId(id) {
-  FKind = copyFrom.FKind;
-  Leads[0] = copyFrom.Leads[0];
-  Leads[1] = copyFrom.Leads[1];
-}
-
-void AMDGPULibFunc::reset() {
+AMDGPUMangledLibFunc::AMDGPUMangledLibFunc() {
   FuncId = EI_NONE;
   FKind = NOPFX;
   Leads[0].reset();
@@ -480,6 +525,16 @@
   Name.clear();
 }
 
+AMDGPUUnmangledLibFunc::AMDGPUUnmangledLibFunc() { FuncId = EI_NONE; }
+
+AMDGPUMangledLibFunc::AMDGPUMangledLibFunc(
+    EFuncId id, const AMDGPUMangledLibFunc &copyFrom) {
+  FuncId = id;
+  FKind = copyFrom.FKind;
+  Leads[0] = copyFrom.Leads[0];
+  Leads[1] = copyFrom.Leads[1];
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 // Demangling
 
@@ -508,8 +563,8 @@
   return Pfx;
 }
 
-bool AMDGPULibFunc::parseName(const StringRef& fullName) {
-  FuncId = static_cast<EFuncId>(manglingRulesMap.lookup(fullName));
+bool AMDGPUMangledLibFunc::parseUnmangledName(StringRef FullName) {
+  FuncId = static_cast<EFuncId>(manglingRulesMap.lookup(FullName));
   return FuncId != EI_NONE;
 }
 
@@ -601,10 +656,11 @@
   return true;
 }
 
-bool AMDGPULibFunc::parseItanuimName(StringRef& mangledName) {
+bool AMDGPUMangledLibFunc::parseFuncName(StringRef &mangledName) {
   StringRef Name = eatLengthPrefixedName(mangledName);
   FKind = parseNamePrefix(Name);
-  if (!parseName(Name)) return false;
+  if (!parseUnmangledName(Name))
+    return false;
 
   const ManglingRule& Rule = manglingRules[FuncId];
   ItaniumParamParser Parser;
@@ -619,30 +675,40 @@
   return true;
 }
 
-bool AMDGPULibFunc::parse(StringRef mangledName, AMDGPULibFunc& iInfo) {
-  iInfo.reset();
-  if (mangledName.empty())
+bool AMDGPUUnmangledLibFunc::parseFuncName(StringRef &Name) {
+  if (!UnmangledFuncInfo::lookup(Name, FuncId))
     return false;
+  setName(Name);
+  return true;
+}
 
-  if (eatTerm(mangledName, "_Z")) {
-    return iInfo.parseItanuimName(mangledName);
-  }
-  return false;
+std::unique_ptr<AMDGPULibFunc> AMDGPULibFunc::parse(StringRef FuncName) {
+  if (FuncName.empty())
+    return std::unique_ptr<AMDGPULibFunc>();
+
+  std::unique_ptr<AMDGPULibFunc> LibF;
+  if (eatTerm(FuncName, "_Z"))
+    LibF = make_unique<AMDGPUMangledLibFunc>();
+  else
+    LibF = make_unique<AMDGPUUnmangledLibFunc>();
+  if (LibF->parseFuncName(FuncName))
+    return LibF;
+
+  return std::unique_ptr<AMDGPULibFunc>();
 }
 
-StringRef AMDGPULibFunc::getUnmangledName(const StringRef& mangledName) {
+StringRef AMDGPUMangledLibFunc::getUnmangledName(StringRef mangledName) {
   StringRef S = mangledName;
   if (eatTerm(S, "_Z"))
     return eatLengthPrefixedName(S);
   return StringRef();
 }
 
-
 ///////////////////////////////////////////////////////////////////////////////
 // Mangling
 
 template <typename Stream>
-void AMDGPULibFunc::writeName(Stream& OS) const {
+void AMDGPUMangledLibFunc::writeName(Stream &OS) const {
   const char *Pfx = "";
   switch (FKind) {
   case NATIVE: Pfx = "native_"; break;
@@ -658,9 +724,7 @@
   }
 }
 
-std::string AMDGPULibFunc::mangle() const {
-  return mangleNameItanium();
-}
+std::string AMDGPUMangledLibFunc::mangle() const { return mangleNameItanium(); }
 
 ///////////////////////////////////////////////////////////////////////////////
 // Itanium Mangling
@@ -788,7 +852,7 @@
 };
 } // namespace
 
-std::string AMDGPULibFunc::mangleNameItanium() const {
+std::string AMDGPUMangledLibFunc::mangleNameItanium() const {
   SmallString<128> Buf;
   raw_svector_ostream S(Buf);
   SmallString<128> NameBuf;
@@ -850,7 +914,7 @@
   return T;
 }
 
-FunctionType* AMDGPULibFunc::getFunctionType(Module& M) const {
+FunctionType *AMDGPUMangledLibFunc::getFunctionType(Module &M) const {
   LLVMContext& C = M.getContext();
   std::vector<Type*> Args;
   ParamIterator I(Leads, manglingRules[FuncId]);
@@ -863,18 +927,22 @@
     Args, false);
 }
 
-unsigned AMDGPULibFunc::getNumArgs() const {
+unsigned AMDGPUMangledLibFunc::getNumArgs() const {
   return manglingRules[FuncId].getNumArgs();
 }
 
-std::string AMDGPULibFunc::getName() const {
+unsigned AMDGPUUnmangledLibFunc::getNumArgs() const {
+  return UnmangledFuncInfo::getNumArgs(FuncId);
+}
+
+std::string AMDGPUMangledLibFunc::getUnmangledName() const {
   SmallString<128> Buf;
   raw_svector_ostream OS(Buf);
   writeName(OS);
   return OS.str();
 }
 
-Function *AMDGPULibFunc::getFunction(Module *M, const AMDGPULibFunc& fInfo) {
+Function *AMDGPULibFunc::getFunction(Module *M, const AMDGPULibFunc &fInfo) {
   std::string FuncName = fInfo.mangle();
   Function *F = dyn_cast_or_null<Function>(
     M->getValueSymbolTable().lookup(FuncName));
@@ -889,7 +957,7 @@
 }
 
 Function *AMDGPULibFunc::getOrInsertFunction(Module *M,
-                                             const AMDGPULibFunc& fInfo) {
+                                             const AMDGPULibFunc &fInfo) {
   std::string const FuncName = fInfo.mangle();
   Function *F = dyn_cast_or_null<Function>(
     M->getValueSymbolTable().lookup(FuncName));
@@ -929,3 +997,13 @@
 
   return cast<Function>(C);
 }
+
+bool UnmangledFuncInfo::lookup(StringRef Name, ID &Id) {
+  auto Loc = Map.find(Name);
+  if (Loc != Map.end()) {
+    Id = toFuncId(Loc->second);
+    return true;
+  }
+  Id = AMDGPULibFunc::EI_NONE;
+  return false;
+}
Index: test/CodeGen/AMDGPU/simplify-libcalls.ll
===================================================================
--- test/CodeGen/AMDGPU/simplify-libcalls.ll
+++ test/CodeGen/AMDGPU/simplify-libcalls.ll
@@ -1,6 +1,6 @@
-; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-simplify-libcall <%s | FileCheck -check-prefix=GCN -check-prefix=GCN-POSTLINK %s
-; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-simplify-libcall -amdgpu-prelink <%s | FileCheck -check-prefix=GCN -check-prefix=GCN-PRELINK %s
-; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-use-native -amdgpu-prelink <%s | FileCheck -check-prefix=GCN -check-prefix=GCN-NATIVE %s
+; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-simplify-libcall <%s | opt -instnamer -S | FileCheck -check-prefix=GCN -check-prefix=GCN-POSTLINK %s
+; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-simplify-libcall -amdgpu-prelink <%s | opt -instnamer -S | FileCheck -check-prefix=GCN -check-prefix=GCN-PRELINK %s
+; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-use-native -amdgpu-prelink <%s | opt -instnamer -S | FileCheck -check-prefix=GCN -check-prefix=GCN-NATIVE %s
 
 ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos
 ; GCN-POSTLINK: tail call fast float @_Z3sinf(
@@ -299,8 +299,8 @@
 ; GCN: %__powx2 = fmul fast float %tmp, %tmp
 ; GCN: %__powx21 = fmul fast float %__powx2, %__powx2
 ; GCN: %__powx22 = fmul fast float %__powx2, %tmp
-; GCN: %0 = fmul fast float %__powx21, %__powx21
-; GCN: %__powprod3 = fmul fast float %0, %__powx22
+; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21
+; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22
 define amdgpu_kernel void @test_pow_c(float addrspace(1)* nocapture %a) {
 entry:
   %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
@@ -314,8 +314,8 @@
 ; GCN: %__powx2 = fmul fast float %tmp, %tmp
 ; GCN: %__powx21 = fmul fast float %__powx2, %__powx2
 ; GCN: %__powx22 = fmul fast float %__powx2, %tmp
-; GCN: %0 = fmul fast float %__powx21, %__powx21
-; GCN: %__powprod3 = fmul fast float %0, %__powx22
+; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21
+; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22
 define amdgpu_kernel void @test_powr_c(float addrspace(1)* nocapture %a) {
 entry:
   %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
@@ -331,8 +331,8 @@
 ; GCN: %__powx2 = fmul fast float %tmp, %tmp
 ; GCN: %__powx21 = fmul fast float %__powx2, %__powx2
 ; GCN: %__powx22 = fmul fast float %__powx2, %tmp
-; GCN: %0 = fmul fast float %__powx21, %__powx21
-; GCN: %__powprod3 = fmul fast float %0, %__powx22
+; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21
+; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22
 define amdgpu_kernel void @test_pown_c(float addrspace(1)* nocapture %a) {
 entry:
   %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1
@@ -350,12 +350,12 @@
 ; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %__fabs)
 ; GCN-PRELINK: %__ylogx = fmul fast float %__log2, 1.013000e+03
 ; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx)
-; GCN-PRELINK: %0 = bitcast float %tmp to i32
-; GCN-PRELINK: %__pow_sign = and i32 %0, -2147483648
-; GCN-PRELINK: %1 = bitcast float %__exp2 to i32
-; GCN-PRELINK: %2 = or i32 %__pow_sign, %1
-; GCN-PRELINK: %3 = bitcast float addrspace(1)* %a to i32 addrspace(1)*
-; GCN-PRELINK: store i32 %2, i32 addrspace(1)* %3, align 4
+; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32
+; GCN-PRELINK: %__pow_sign = and i32 %[[r0]], -2147483648
+; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32
+; GCN-PRELINK: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]]
+; GCN-PRELINK: %[[r3:.*]] = bitcast float addrspace(1)* %a to i32 addrspace(1)*
+; GCN-PRELINK: store i32 %[[r2]], i32 addrspace(1)* %[[r3]], align 4
 define amdgpu_kernel void @test_pow(float addrspace(1)* nocapture %a) {
 entry:
   %tmp = load float, float addrspace(1)* %a, align 4
@@ -393,12 +393,12 @@
 ; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %pownI2F
 ; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx)
 ; GCN-PRELINK: %__yeven = shl i32 %conv, 31
-; GCN-PRELINK: %0 = bitcast float %tmp to i32
-; GCN-PRELINK: %__pow_sign = and i32 %__yeven, %0
-; GCN-PRELINK: %1 = bitcast float %__exp2 to i32
-; GCN-PRELINK: %2 = or i32 %__pow_sign, %1
-; GCN-PRELINK: %3 = bitcast float addrspace(1)* %a to i32 addrspace(1)*
-; GCN-PRELINK: store i32 %2, i32 addrspace(1)* %3, align 4
+; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32
+; GCN-PRELINK: %__pow_sign = and i32 %__yeven, %[[r0]]
+; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32
+; GCN-PRELINK: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]]
+; GCN-PRELINK: %[[r3:.*]] = bitcast float addrspace(1)* %a to i32 addrspace(1)*
+; GCN-PRELINK: store i32 %[[r2]], i32 addrspace(1)* %[[r3]], align 4
 define amdgpu_kernel void @test_pown(float addrspace(1)* nocapture %a) {
 entry:
   %tmp = load float, float addrspace(1)* %a, align 4
@@ -692,3 +692,96 @@
 }
 
 declare float @_Z6sincosfPU3AS4f(float, float addrspace(4)*)
+
+%opencl.pipe_t = type opaque
+%opencl.reserve_id_t = type opaque
+
+; GCN-LABEL: {{^}}define amdgpu_kernel void @test_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr)
+; GCN-PRELINK: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32 addrspace(4)* %{{.*}}) #[[NOUNWIND:[0-9]+]]
+; GCN-PRELINK: call i32 @__read_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 2, i32 addrspace(4)* %{{.*}}) #[[NOUNWIND]]
+define amdgpu_kernel void @test_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) local_unnamed_addr {
+entry:
+  %0 = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)*
+  %1 = addrspacecast i8 addrspace(1)* %0 to i8 addrspace(4)*
+  %2 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8 addrspace(4)* %1, i32 4, i32 4) #0
+  %3 = tail call %opencl.reserve_id_t* @__reserve_read_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4)
+  %4 = tail call i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t* %3, i32 2, i8 addrspace(4)* %1, i32 4, i32 4) #0
+  tail call void @__commit_read_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t* %3, i32 4, i32 4)
+  ret void
+}
+
+declare i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)*, i8 addrspace(4)*, i32, i32)
+
+declare %opencl.reserve_id_t* @__reserve_read_pipe(%opencl.pipe_t addrspace(1)*, i32, i32, i32)
+
+declare i32 @__read_pipe_4(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t*, i32, i8 addrspace(4)*, i32, i32)
+
+declare void @__commit_read_pipe(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t*, i32, i32)
+
+; GCN-LABEL: {{^}}define amdgpu_kernel void @test_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr)
+; GCN-PRELINK: call i32 @__write_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}}, i32 addrspace(4)* %{{.*}}) #[[NOUNWIND]]
+; GCN-PRELINK: call i32 @__write_pipe_4_4(%opencl.pipe_t addrspace(1)* %{{.*}}, %opencl.reserve_id_t* %{{.*}}, i32 2, i32 addrspace(4)* %{{.*}}) #[[NOUNWIND]]
+define amdgpu_kernel void @test_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 addrspace(1)* %ptr) local_unnamed_addr {
+entry:
+  %0 = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)*
+  %1 = addrspacecast i8 addrspace(1)* %0 to i8 addrspace(4)*
+  %2 = tail call i32 @__write_pipe_2(%opencl.pipe_t addrspace(1)* %p, i8 addrspace(4)* %1, i32 4, i32 4) #0
+  %3 = tail call %opencl.reserve_id_t* @__reserve_write_pipe(%opencl.pipe_t addrspace(1)* %p, i32 2, i32 4, i32 4) #0
+  %4 = tail call i32 @__write_pipe_4(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t* %3, i32 2, i8 addrspace(4)* %1, i32 4, i32 4) #0
+  tail call void @__commit_write_pipe(%opencl.pipe_t addrspace(1)* %p, %opencl.reserve_id_t* %3, i32 4, i32 4) #0
+  ret void
+}
+
+declare i32 @__write_pipe_2(%opencl.pipe_t addrspace(1)*, i8 addrspace(4)*, i32, i32) local_unnamed_addr
+
+declare %opencl.reserve_id_t* @__reserve_write_pipe(%opencl.pipe_t addrspace(1)*, i32, i32, i32) local_unnamed_addr
+
+declare i32 @__write_pipe_4(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t*, i32, i8 addrspace(4)*, i32, i32) local_unnamed_addr
+
+declare void @__commit_write_pipe(%opencl.pipe_t addrspace(1)*, %opencl.reserve_id_t*, i32, i32) local_unnamed_addr
+
+%struct.S = type { [100 x i32] }
+
+; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pipe_size
+; GCN-PRELINK: call i32 @__read_pipe_2_1(%opencl.pipe_t addrspace(1)* %{{.*}} i8 addrspace(4)* %{{.*}}) #[[NOUNWIND]]
+; GCN-PRELINK: call i32 @__read_pipe_2_2(%opencl.pipe_t addrspace(1)* %{{.*}} i16 addrspace(4)* %{{.*}}) #[[NOUNWIND]]
+; GCN-PRELINK: call i32 @__read_pipe_2_4(%opencl.pipe_t addrspace(1)* %{{.*}} i32 addrspace(4)* %{{.*}}) #[[NOUNWIND]]
+; GCN-PRELINK: call i32 @__read_pipe_2_8(%opencl.pipe_t addrspace(1)* %{{.*}} i64 addrspace(4)* %{{.*}}) #[[NOUNWIND]]
+; GCN-PRELINK: call i32 @__read_pipe_2_16(%opencl.pipe_t addrspace(1)* %{{.*}}, <2 x i64> addrspace(4)* %{{.*}}) #[[NOUNWIND]]
+; GCN-PRELINK: call i32 @__read_pipe_2_32(%opencl.pipe_t addrspace(1)* %{{.*}}, <4 x i64> addrspace(4)* %{{.*}} #[[NOUNWIND]]
+; GCN-PRELINK: call i32 @__read_pipe_2_64(%opencl.pipe_t addrspace(1)* %{{.*}}, <8 x i64> addrspace(4)* %{{.*}} #[[NOUNWIND]]
+; GCN-PRELINK: call i32 @__read_pipe_2_128(%opencl.pipe_t addrspace(1)* %{{.*}}, <16 x i64> addrspace(4)* %{{.*}} #[[NOUNWIND]]
+; GCN-PRELINK: call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %{{.*}}, i8 addrspace(4)* %{{.*}} i32 400, i32 4) #[[NOUNWIND]]
+define amdgpu_kernel void @test_pipe_size(%opencl.pipe_t addrspace(1)* %p1, i8 addrspace(1)* %ptr1, %opencl.pipe_t addrspace(1)* %p2, i16 addrspace(1)* %ptr2, %opencl.pipe_t addrspace(1)* %p4, i32 addrspace(1)* %ptr4, %opencl.pipe_t addrspace(1)* %p8, i64 addrspace(1)* %ptr8, %opencl.pipe_t addrspace(1)* %p16, <2 x i64> addrspace(1)* %ptr16, %opencl.pipe_t addrspace(1)* %p32, <4 x i64> addrspace(1)* %ptr32, %opencl.pipe_t addrspace(1)* %p64, <8 x i64> addrspace(1)* %ptr64, %opencl.pipe_t addrspace(1)* %p128, <16 x i64> addrspace(1)* %ptr128, %opencl.pipe_t addrspace(1)* %pu, %struct.S addrspace(1)* %ptru) local_unnamed_addr #0 {
+entry:
+  %0 = addrspacecast i8 addrspace(1)* %ptr1 to i8 addrspace(4)*
+  %1 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p1, i8 addrspace(4)* %0, i32 1, i32 1) #0
+  %2 = bitcast i16 addrspace(1)* %ptr2 to i8 addrspace(1)*
+  %3 = addrspacecast i8 addrspace(1)* %2 to i8 addrspace(4)*
+  %4 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p2, i8 addrspace(4)* %3, i32 2, i32 2) #0
+  %5 = bitcast i32 addrspace(1)* %ptr4 to i8 addrspace(1)*
+  %6 = addrspacecast i8 addrspace(1)* %5 to i8 addrspace(4)*
+  %7 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p4, i8 addrspace(4)* %6, i32 4, i32 4) #0
+  %8 = bitcast i64 addrspace(1)* %ptr8 to i8 addrspace(1)*
+  %9 = addrspacecast i8 addrspace(1)* %8 to i8 addrspace(4)*
+  %10 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p8, i8 addrspace(4)* %9, i32 8, i32 8) #0
+  %11 = bitcast <2 x i64> addrspace(1)* %ptr16 to i8 addrspace(1)*
+  %12 = addrspacecast i8 addrspace(1)* %11 to i8 addrspace(4)*
+  %13 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p16, i8 addrspace(4)* %12, i32 16, i32 16) #0
+  %14 = bitcast <4 x i64> addrspace(1)* %ptr32 to i8 addrspace(1)*
+  %15 = addrspacecast i8 addrspace(1)* %14 to i8 addrspace(4)*
+  %16 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p32, i8 addrspace(4)* %15, i32 32, i32 32) #0
+  %17 = bitcast <8 x i64> addrspace(1)* %ptr64 to i8 addrspace(1)*
+  %18 = addrspacecast i8 addrspace(1)* %17 to i8 addrspace(4)*
+  %19 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p64, i8 addrspace(4)* %18, i32 64, i32 64) #0
+  %20 = bitcast <16 x i64> addrspace(1)* %ptr128 to i8 addrspace(1)*
+  %21 = addrspacecast i8 addrspace(1)* %20 to i8 addrspace(4)*
+  %22 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %p128, i8 addrspace(4)* %21, i32 128, i32 128) #0
+  %23 = bitcast %struct.S addrspace(1)* %ptru to i8 addrspace(1)*
+  %24 = addrspacecast i8 addrspace(1)* %23 to i8 addrspace(4)*
+  %25 = tail call i32 @__read_pipe_2(%opencl.pipe_t addrspace(1)* %pu, i8 addrspace(4)* %24, i32 400, i32 4) #0
+  ret void
+}
+
+; CGN-PRELINK: attributes #[[NOUNWIND]] = { nounwind }
+attributes #0 = { nounwind }