diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
--- a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
@@ -1,37 +1,52 @@
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck --check-prefixes=COMMON,CHECK %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -disable-O0-optnone -o - | opt -S -O2 | FileCheck %s --check-prefixes=COMMON,OPT
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -x hip %s -o - | FileCheck -check-prefix=HOST %s
 
 #include "Inputs/cuda.h"
 
 // Coerced struct from `struct S` without all generic pointers lowered into
 // global ones.
-// CHECK: %struct.S.coerce = type { i32 addrspace(1)*, float addrspace(1)* }
-// CHECK: %struct.T.coerce = type { [2 x float addrspace(1)*] }
+// COMMON: %struct.S.coerce = type { i32 addrspace(1)*, float addrspace(1)* }
+// COMMON: %struct.T.coerce = type { [2 x float addrspace(1)*] }
 
 // On the host-side compilation, generic pointer won't be coerced.
 // HOST-NOT: %struct.S.coerce
 // HOST-NOT: %struct.T.coerce
 
-// CHECK: define amdgpu_kernel void  @_Z7kernel1Pi(i32 addrspace(1)* %x.coerce)
 // HOST: define void @_Z22__device_stub__kernel1Pi(i32* %x)
+// COMMON-LABEL: define amdgpu_kernel void @_Z7kernel1Pi(i32 addrspace(1)*{{.*}} %x.coerce)
+// CHECK:     = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
+// CHECK-NOT: = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
+// OPT: [[VAL:%.*]] = load i32, i32 addrspace(1)* %x.coerce, align 4
+// OPT: [[INC:%.*]] = add nsw i32 [[VAL]], 1
+// OPT: store i32 [[INC]], i32 addrspace(1)* %x.coerce, align 4
+// OPT: ret void
 __global__ void kernel1(int *x) {
   x[0]++;
 }
 
-// CHECK: define amdgpu_kernel void  @_Z7kernel2Ri(i32 addrspace(1)* nonnull align 4 dereferenceable(4) %x.coerce)
 // HOST: define void @_Z22__device_stub__kernel2Ri(i32* nonnull align 4 dereferenceable(4) %x)
+// COMMON-LABEL: define amdgpu_kernel void @_Z7kernel2Ri(i32 addrspace(1)*{{.*}} nonnull align 4 dereferenceable(4) %x.coerce)
+// CHECK:     = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
+// CHECK-NOT: = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
+// OPT: [[VAL:%.*]] = load i32, i32 addrspace(1)* %x.coerce, align 4
+// OPT: [[INC:%.*]] = add nsw i32 [[VAL]], 1
+// OPT: store i32 [[INC]], i32 addrspace(1)* %x.coerce, align 4
+// OPT: ret void
 __global__ void kernel2(int &x) {
   x++;
 }
 
-// CHECK: define amdgpu_kernel void  @_Z7kernel3PU3AS2iPU3AS1i(i32 addrspace(2)* %x, i32 addrspace(1)* %y)
 // HOST: define void @_Z22__device_stub__kernel3PU3AS2iPU3AS1i(i32 addrspace(2)* %x, i32 addrspace(1)* %y)
+// CHECK-LABEL: define amdgpu_kernel void  @_Z7kernel3PU3AS2iPU3AS1i(i32 addrspace(2)*{{.*}} %x, i32 addrspace(1)*{{.*}} %y)
+// CHECK-NOT: = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
 __global__ void kernel3(__attribute__((address_space(2))) int *x,
                         __attribute__((address_space(1))) int *y) {
   y[0] = x[0];
 }
 
-// CHECK: define void @_Z4funcPi(i32* %x)
+// COMMON-LABEL: define void @_Z4funcPi(i32*{{.*}} %x)
+// CHECK-NOT: = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
 __device__ void func(int *x) {
   x[0]++;
 }
@@ -42,16 +57,25 @@
 };
 // `by-val` struct will be coerced into a similar struct with all generic
 // pointers lowerd into global ones.
-// CHECK: define amdgpu_kernel void @_Z7kernel41S(%struct.S.coerce %s.coerce)
 // HOST: define void @_Z22__device_stub__kernel41S(i32* %s.coerce0, float* %s.coerce1)
+// COMMON-LABEL: define amdgpu_kernel void @_Z7kernel41S(%struct.S.coerce %s.coerce)
+// OPT: [[P0:%.*]] = extractvalue %struct.S.coerce %s.coerce, 0
+// OPT: [[P1:%.*]] = extractvalue %struct.S.coerce %s.coerce, 1
+// OPT: [[V0:%.*]] = load i32, i32 addrspace(1)* [[P0]], align 4
+// OPT: [[INC:%.*]] = add nsw i32 [[V0]], 1
+// OPT: store i32 [[INC]], i32 addrspace(1)* [[P0]], align 4
+// OPT: [[V1:%.*]] = load float, float addrspace(1)* [[P1]], align 4
+// OPT: [[ADD:%.*]] = fadd contract float [[V1]], 1.000000e+00
+// OPT: store float [[ADD]], float addrspace(1)* [[P1]], align 4
+// OPT: ret void
 __global__ void kernel4(struct S s) {
   s.x[0]++;
   s.y[0] += 1.f;
 }
 
 // If a pointer to struct is passed, only the pointer itself is coerced into the global one.
-// CHECK: define amdgpu_kernel void @_Z7kernel5P1S(%struct.S addrspace(1)* %s.coerce)
 // HOST: define void @_Z22__device_stub__kernel5P1S(%struct.S* %s)
+// COMMON-LABEL: define amdgpu_kernel void @_Z7kernel5P1S(%struct.S addrspace(1)*{{.*}} %s.coerce)
 __global__ void kernel5(struct S *s) {
   s->x[0]++;
   s->y[0] += 1.f;
@@ -61,16 +85,26 @@
   float *x[2];
 };
 // `by-val` array is also coerced.
-// CHECK: define amdgpu_kernel void @_Z7kernel61T(%struct.T.coerce %t.coerce)
 // HOST: define void @_Z22__device_stub__kernel61T(float* %t.coerce0, float* %t.coerce1)
+// COMMON-LABEL: define amdgpu_kernel void @_Z7kernel61T(%struct.T.coerce %t.coerce)
+// OPT: [[ARR:%.*]] = extractvalue %struct.T.coerce %t.coerce, 0
+// OPT: [[P0:%.*]] = extractvalue [2 x float addrspace(1)*] [[ARR]], 0
+// OPT: [[P1:%.*]] = extractvalue [2 x float addrspace(1)*] [[ARR]], 1
+// OPT: [[V0:%.*]] = load float, float addrspace(1)* [[P0]], align 4
+// OPT: [[ADD0:%.*]] = fadd contract float [[V0]], 1.000000e+00
+// OPT: store float [[ADD0]], float addrspace(1)* [[P0]], align 4
+// OPT: [[V1:%.*]] = load float, float addrspace(1)* [[P1]], align 4
+// OPT: [[ADD1:%.*]] = fadd contract float [[V1]], 2.000000e+00
+// OPT: store float [[ADD1]], float addrspace(1)* [[P1]], align 4
+// OPT: ret void
 __global__ void kernel6(struct T t) {
   t.x[0][0] += 1.f;
   t.x[1][0] += 2.f;
 }
 
 // Check that coerced pointers retain the noalias attribute when qualified with __restrict.
-// CHECK: define amdgpu_kernel void @_Z7kernel7Pi(i32 addrspace(1)* noalias %x.coerce)
 // HOST: define void @_Z22__device_stub__kernel7Pi(i32* noalias %x)
+// COMMON-LABEL: define amdgpu_kernel void @_Z7kernel7Pi(i32 addrspace(1)* noalias{{.*}} %x.coerce)
 __global__ void kernel7(int *__restrict x) {
   x[0]++;
 }
diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -96,7 +96,6 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
@@ -116,11 +115,13 @@
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 #include <cassert>
 #include <iterator>
@@ -132,6 +133,11 @@
 
 using namespace llvm;
 
+static cl::opt<bool> AssumeDefaultIsFlatAddressSpace(
+    "assume-default-is-flat-addrspace", cl::init(false), cl::ReallyHidden,
+    cl::desc("The default address space is assumed as the flat address space. "
+             "This is mainly for test purpose."));
+
 static const unsigned UninitializedAddressSpace =
     std::numeric_limits<unsigned>::max();
 
@@ -143,6 +149,7 @@
 /// InferAddressSpaces
 class InferAddressSpaces : public FunctionPass {
   const TargetTransformInfo *TTI = nullptr;
+  const DataLayout *DL = nullptr;
 
   /// Target specific address space which uses of should be replaced if
   /// possible.
@@ -219,10 +226,45 @@
 INITIALIZE_PASS(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",
                 false, false)
 
+// Check whether that's no-op pointer bicast using a pair of
+// `ptrtoint`/`inttoptr` due to the missing no-op pointer bitcast over
+// different address spaces.
+static bool isNoopPtrIntCastPair(const Operator *I2P, const DataLayout &DL,
+                                 const TargetTransformInfo *TTI) {
+  assert(I2P->getOpcode() == Instruction::IntToPtr);
+  auto *P2I = dyn_cast<Operator>(I2P->getOperand(0));
+  if (!P2I || P2I->getOpcode() != Instruction::PtrToInt)
+    return false;
+  // Check it's really safe to treat that pair of `ptrtoint`/`inttoptr` is a
+  // no-op cast. Besides checking both of them are no-op casts, as the
+  // reinterpreted pointer may be used in other pointer arithemetic, we also
+  // need to double-check that through the target-specific hook to ensure the
+  // underlying target also agrees that's a no-op address space cast and bit
+  // values are the same.
+  // The current IR spec doesn't have clear rules on address space casts,
+  // especially a clear definition for pointer bits in non-0 address spaces. It
+  // would be an undefined behavior if that pointer bits are accessed after an
+  // invalid reinterpret cast. Also, due to the unclearness for the meaning of
+  // bits in non-0 address spaces in the current spec, the pointer arithmetic
+  // may also be undefined after invalid pointer reinterpret cast. However, as
+  // we confirm through the target hooks that it's a no-op addrspacecast, it
+  // doens't matter since the bits should be the same.
+  return CastInst::isNoopCast(Instruction::CastOps(I2P->getOpcode()),
+                              I2P->getOperand(0)->getType(), I2P->getType(),
+                              DL) &&
+         CastInst::isNoopCast(Instruction::CastOps(P2I->getOpcode()),
+                              P2I->getOperand(0)->getType(), P2I->getType(),
+                              DL) &&
+         TTI->isNoopAddrSpaceCast(
+             P2I->getOperand(0)->getType()->getPointerAddressSpace(),
+             I2P->getType()->getPointerAddressSpace());
+}
+
 // Returns true if V is an address expression.
 // TODO: Currently, we consider only phi, bitcast, addrspacecast, and
 // getelementptr operators.
-static bool isAddressExpression(const Value &V) {
+static bool isAddressExpression(const Value &V, const DataLayout &DL,
+                                const TargetTransformInfo *TTI) {
   const Operator *Op = dyn_cast<Operator>(&V);
   if (!Op)
     return false;
@@ -241,6 +283,8 @@
     const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&V);
     return II && II->getIntrinsicID() == Intrinsic::ptrmask;
   }
+  case Instruction::IntToPtr:
+    return isNoopPtrIntCastPair(Op, DL, TTI);
   default:
     return false;
   }
@@ -249,7 +293,9 @@
 // Returns the pointer operands of V.
 //
 // Precondition: V is an address expression.
-static SmallVector<Value *, 2> getPointerOperands(const Value &V) {
+static SmallVector<Value *, 2>
+getPointerOperands(const Value &V, const DataLayout &DL,
+                   const TargetTransformInfo *TTI) {
   const Operator &Op = cast<Operator>(V);
   switch (Op.getOpcode()) {
   case Instruction::PHI: {
@@ -269,6 +315,11 @@
            "unexpected intrinsic call");
     return {II.getArgOperand(0)};
   }
+  case Instruction::IntToPtr: {
+    assert(isNoopPtrIntCastPair(&Op, DL, TTI));
+    auto *P2I = cast<Operator>(Op.getOperand(0));
+    return {P2I->getOperand(0)};
+  }
   default:
     llvm_unreachable("Unexpected instruction type.");
   }
@@ -337,13 +388,13 @@
   // expressions.
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
     // TODO: Look in non-address parts, like icmp operands.
-    if (isAddressExpression(*CE) && Visited.insert(CE).second)
+    if (isAddressExpression(*CE, *DL, TTI) && Visited.insert(CE).second)
       PostorderStack.emplace_back(CE, false);
 
     return;
   }
 
-  if (isAddressExpression(*V) &&
+  if (isAddressExpression(*V, *DL, TTI) &&
       V->getType()->getPointerAddressSpace() == FlatAddrSpace) {
     if (Visited.insert(V).second) {
       PostorderStack.emplace_back(V, false);
@@ -351,7 +402,7 @@
       Operator *Op = cast<Operator>(V);
       for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I) {
         if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op->getOperand(I))) {
-          if (isAddressExpression(*CE) && Visited.insert(CE).second)
+          if (isAddressExpression(*CE, *DL, TTI) && Visited.insert(CE).second)
             PostorderStack.emplace_back(CE, false);
         }
       }
@@ -407,6 +458,10 @@
     } else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
       if (!ASC->getType()->isVectorTy())
         PushPtrOperand(ASC->getPointerOperand());
+    } else if (auto *I2P = dyn_cast<IntToPtrInst>(&I)) {
+      if (isNoopPtrIntCastPair(cast<Operator>(I2P), *DL, TTI))
+        PushPtrOperand(
+            cast<PtrToIntInst>(I2P->getOperand(0))->getPointerOperand());
     }
   }
 
@@ -423,7 +478,7 @@
     }
     // Otherwise, adds its operands to the stack and explores them.
     PostorderStack.back().setInt(true);
-    for (Value *PtrOperand : getPointerOperands(*TopVal)) {
+    for (Value *PtrOperand : getPointerOperands(*TopVal, *DL, TTI)) {
       appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack,
                                                    Visited);
     }
@@ -536,6 +591,14 @@
     assert(I->getType()->isPointerTy());
     return SelectInst::Create(I->getOperand(0), NewPointerOperands[1],
                               NewPointerOperands[2], "", nullptr, I);
+  case Instruction::IntToPtr: {
+    assert(isNoopPtrIntCastPair(cast<Operator>(I), *DL, TTI));
+    Value *Src = cast<Operator>(I->getOperand(0))->getOperand(0);
+    assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace);
+    if (Src->getType() != NewPtrType)
+      return new BitCastInst(Src, NewPtrType);
+    return Src;
+  }
   default:
     llvm_unreachable("Unexpected opcode");
   }
@@ -545,8 +608,9 @@
 // constant expression `CE` with its operands replaced as specified in
 // ValueWithNewAddrSpace.
 static Value *cloneConstantExprWithNewAddressSpace(
-  ConstantExpr *CE, unsigned NewAddrSpace,
-  const ValueToValueMapTy &ValueWithNewAddrSpace) {
+    ConstantExpr *CE, unsigned NewAddrSpace,
+    const ValueToValueMapTy &ValueWithNewAddrSpace, const DataLayout *DL,
+    const TargetTransformInfo *TTI) {
   Type *TargetType =
     CE->getType()->getPointerElementType()->getPointerTo(NewAddrSpace);
 
@@ -577,6 +641,13 @@
     }
   }
 
+  if (CE->getOpcode() == Instruction::IntToPtr) {
+    assert(isNoopPtrIntCastPair(cast<Operator>(CE), *DL, TTI));
+    Constant *Src = cast<ConstantExpr>(CE->getOperand(0))->getOperand(0);
+    assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace);
+    return ConstantExpr::getBitCast(Src, TargetType);
+  }
+
   // Computes the operands of the new constant expression.
   bool IsNew = false;
   SmallVector<Constant *, 4> NewOperands;
@@ -594,7 +665,7 @@
     }
     if (auto CExpr = dyn_cast<ConstantExpr>(Operand))
       if (Value *NewOperand = cloneConstantExprWithNewAddressSpace(
-              CExpr, NewAddrSpace, ValueWithNewAddrSpace)) {
+              CExpr, NewAddrSpace, ValueWithNewAddrSpace, DL, TTI)) {
         IsNew = true;
         NewOperands.push_back(cast<Constant>(NewOperand));
         continue;
@@ -629,7 +700,7 @@
   const ValueToValueMapTy &ValueWithNewAddrSpace,
   SmallVectorImpl<const Use *> *UndefUsesToFix) const {
   // All values in Postorder are flat address expressions.
-  assert(isAddressExpression(*V) &&
+  assert(isAddressExpression(*V, *DL, TTI) &&
          V->getType()->getPointerAddressSpace() == FlatAddrSpace);
 
   if (Instruction *I = dyn_cast<Instruction>(V)) {
@@ -645,7 +716,7 @@
   }
 
   return cloneConstantExprWithNewAddressSpace(
-    cast<ConstantExpr>(V), NewAddrSpace, ValueWithNewAddrSpace);
+      cast<ConstantExpr>(V), NewAddrSpace, ValueWithNewAddrSpace, DL, TTI);
 }
 
 // Defines the join operation on the address space lattice (see the file header
@@ -669,6 +740,10 @@
     return false;
 
   TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+  DL = &F.getParent()->getDataLayout();
+
+  if (AssumeDefaultIsFlatAddressSpace)
+    FlatAddrSpace = 0;
 
   if (FlatAddrSpace == UninitializedAddressSpace) {
     FlatAddrSpace = TTI->getFlatAddressSpace();
@@ -773,7 +848,7 @@
     else
       NewAS = joinAddressSpaces(Src0AS, Src1AS);
   } else {
-    for (Value *PtrOperand : getPointerOperands(V)) {
+    for (Value *PtrOperand : getPointerOperands(V, *DL, TTI)) {
       auto I = InferredAddrSpace.find(PtrOperand);
       unsigned OperandAS = I != InferredAddrSpace.end() ?
         I->second : PtrOperand->getType()->getPointerAddressSpace();
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll
@@ -0,0 +1,101 @@
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -o - -infer-address-spaces %s | FileCheck -check-prefixes=COMMON,AMDGCN %s
+; RUN: opt -S -o - -infer-address-spaces -assume-default-is-flat-addrspace %s | FileCheck -check-prefixes=COMMON,NOTTI %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7"
+
+; COMMON-LABEL: @noop_ptrint_pair(
+; AMDGCN-NEXT: store i32 0, i32 addrspace(1)* %{{.*}}
+; AMDGCN-NEXT: ret void
+; NOTTI-NEXT: %1 = ptrtoint i32 addrspace(1)* %x.coerce to i64
+; NOTTI-NEXT: %2 = inttoptr i64 %1 to i32*
+; NOTTI-NEXT: store i32 0, i32* %2
+; NOTTI-NEXT: ret void
+define void @noop_ptrint_pair(i32 addrspace(1)* %x.coerce) {
+  %1 = ptrtoint i32 addrspace(1)* %x.coerce to i64
+  %2 = inttoptr i64 %1 to i32*
+  store i32 0, i32* %2
+  ret void
+}
+
+; COMMON-LABEL: @non_noop_ptrint_pair(
+; AMDGCN-NEXT: ptrtoint i32 addrspace(3)* %{{.*}} to i64
+; AMDGCN-NEXT: inttoptr i64 %{{.*}} to i32*
+; AMDGCN-NEXT: store i32 0, i32* %{{.*}}
+; AMDGCN-NEXT: ret void
+; NOTTI-NEXT: ptrtoint i32 addrspace(3)* %{{.*}} to i64
+; NOTTI-NEXT: inttoptr i64 %{{.*}} to i32*
+; NOTTI-NEXT: store i32 0, i32* %{{.*}}
+; NOTTI-NEXT: ret void
+define void @non_noop_ptrint_pair(i32 addrspace(3)* %x.coerce) {
+  %1 = ptrtoint i32 addrspace(3)* %x.coerce to i64
+  %2 = inttoptr i64 %1 to i32*
+  store i32 0, i32* %2
+  ret void
+}
+
+; COMMON-LABEL: @non_noop_ptrint_pair2(
+; AMDGCN-NEXT: ptrtoint i32 addrspace(1)* %{{.*}} to i32
+; AMDGCN-NEXT: inttoptr i32 %{{.*}} to i32*
+; AMDGCN-NEXT: store i32 0, i32* %{{.*}}
+; AMDGCN-NEXT: ret void
+; NOTTI-NEXT: ptrtoint i32 addrspace(1)* %{{.*}} to i32
+; NOTTI-NEXT: inttoptr i32 %{{.*}} to i32*
+; NOTTI-NEXT: store i32 0, i32* %{{.*}}
+; NOTTI-NEXT: ret void
+define void @non_noop_ptrint_pair2(i32 addrspace(1)* %x.coerce) {
+  %1 = ptrtoint i32 addrspace(1)* %x.coerce to i32
+  %2 = inttoptr i32 %1 to i32*
+  store i32 0, i32* %2
+  ret void
+}
+
+@g = addrspace(1) global i32 0, align 4
+@l = addrspace(3) global i32 0, align 4
+
+; COMMON-LABEL: @noop_ptrint_pair_ce(
+; AMDGCN-NEXT: store i32 0, i32 addrspace(1)* @g
+; AMDGCN-NEXT: ret void
+; NOTTI-NEXT: store i32 0, i32* inttoptr (i64 ptrtoint (i32 addrspace(1)* @g to i64) to i32*)
+; NOTTI-NEXT: ret void
+define void @noop_ptrint_pair_ce() {
+  store i32 0, i32* inttoptr (i64 ptrtoint (i32 addrspace(1)* @g to i64) to i32*)
+  ret void
+}
+
+; COMMON-LABEL: @noop_ptrint_pair_ce2(
+; AMDGCN-NEXT: ret i32* addrspacecast (i32 addrspace(1)* @g to i32*)
+; NOTTI-NEXT: ret i32* inttoptr (i64 ptrtoint (i32 addrspace(1)* @g to i64) to i32*)
+define i32* @noop_ptrint_pair_ce2() {
+  ret i32* inttoptr (i64 ptrtoint (i32 addrspace(1)* @g to i64) to i32*)
+}
+
+; COMMON-LABEL: @non_noop_ptrint_pair_ce(
+; AMDGCN-NEXT: store i32 0, i32* inttoptr (i64 ptrtoint (i32 addrspace(3)* @l to i64) to i32*)
+; AMDGCN-NEXT: ret void
+; NOTTI-NEXT: store i32 0, i32* inttoptr (i64 ptrtoint (i32 addrspace(3)* @l to i64) to i32*)
+; NOTTI-NEXT: ret void
+define void @non_noop_ptrint_pair_ce() {
+  store i32 0, i32* inttoptr (i64 ptrtoint (i32 addrspace(3)* @l to i64) to i32*)
+  ret void
+}
+
+; COMMON-LABEL: @non_noop_ptrint_pair_ce2(
+; AMDGCN-NEXT: ret i32* inttoptr (i64 ptrtoint (i32 addrspace(3)* @l to i64) to i32*)
+; NOTTI-NEXT: ret i32* inttoptr (i64 ptrtoint (i32 addrspace(3)* @l to i64) to i32*)
+define i32* @non_noop_ptrint_pair_ce2() {
+  ret i32* inttoptr (i64 ptrtoint (i32 addrspace(3)* @l to i64) to i32*)
+}
+
+; COMMON-LABEL: @non_noop_ptrint_pair_ce3(
+; AMDGCN-NEXT: ret i32* inttoptr (i32 ptrtoint (i32 addrspace(1)* @g to i32) to i32*)
+; NOTTI-NEXT: ret i32* inttoptr (i32 ptrtoint (i32 addrspace(1)* @g to i32) to i32*)
+define i32* @non_noop_ptrint_pair_ce3() {
+  ret i32* inttoptr (i32 ptrtoint (i32 addrspace(1)* @g to i32) to i32*)
+}
+
+; COMMON-LABEL: @non_noop_ptrint_pair_ce4(
+; AMDGCN-NEXT: ret i32* inttoptr (i128 ptrtoint (i32 addrspace(3)* @l to i128) to i32*)
+; NOTTI-NEXT: ret i32* inttoptr (i128 ptrtoint (i32 addrspace(3)* @l to i128) to i32*)
+define i32* @non_noop_ptrint_pair_ce4() {
+  ret i32* inttoptr (i128 ptrtoint (i32 addrspace(3)* @l to i128) to i32*)
+}