diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu --- a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu +++ b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu @@ -1,37 +1,52 @@ -// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck --check-prefixes=COMMON,CHECK %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -disable-O0-optnone -o - | opt -S -O2 | FileCheck %s --check-prefixes=COMMON,OPT // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -x hip %s -o - | FileCheck -check-prefix=HOST %s #include "Inputs/cuda.h" // Coerced struct from `struct S` without all generic pointers lowered into // global ones. -// CHECK: %struct.S.coerce = type { i32 addrspace(1)*, float addrspace(1)* } -// CHECK: %struct.T.coerce = type { [2 x float addrspace(1)*] } +// COMMON: %struct.S.coerce = type { i32 addrspace(1)*, float addrspace(1)* } +// COMMON: %struct.T.coerce = type { [2 x float addrspace(1)*] } // On the host-side compilation, generic pointer won't be coerced. // HOST-NOT: %struct.S.coerce // HOST-NOT: %struct.T.coerce -// CHECK: define amdgpu_kernel void @_Z7kernel1Pi(i32 addrspace(1)* %x.coerce) // HOST: define void @_Z22__device_stub__kernel1Pi(i32* %x) +// COMMON-LABEL: define amdgpu_kernel void @_Z7kernel1Pi(i32 addrspace(1)*{{.*}} %x.coerce) +// CHECK: = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* +// CHECK-NOT: = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* +// OPT: [[VAL:%.*]] = load i32, i32 addrspace(1)* %x.coerce, align 4 +// OPT: [[INC:%.*]] = add nsw i32 [[VAL]], 1 +// OPT: store i32 [[INC]], i32 addrspace(1)* %x.coerce, align 4 +// OPT: ret void __global__ void kernel1(int *x) { x[0]++; } -// CHECK: define amdgpu_kernel void @_Z7kernel2Ri(i32 addrspace(1)* nonnull align 4 dereferenceable(4) %x.coerce) // HOST: define void @_Z22__device_stub__kernel2Ri(i32* nonnull align 4 dereferenceable(4) %x) +// COMMON-LABEL: define amdgpu_kernel void @_Z7kernel2Ri(i32 addrspace(1)*{{.*}} nonnull align 4 dereferenceable(4) %x.coerce) +// CHECK: = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* +// CHECK-NOT: = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* +// OPT: [[VAL:%.*]] = load i32, i32 addrspace(1)* %x.coerce, align 4 +// OPT: [[INC:%.*]] = add nsw i32 [[VAL]], 1 +// OPT: store i32 [[INC]], i32 addrspace(1)* %x.coerce, align 4 +// OPT: ret void __global__ void kernel2(int &x) { x++; } -// CHECK: define amdgpu_kernel void @_Z7kernel3PU3AS2iPU3AS1i(i32 addrspace(2)* %x, i32 addrspace(1)* %y) // HOST: define void @_Z22__device_stub__kernel3PU3AS2iPU3AS1i(i32 addrspace(2)* %x, i32 addrspace(1)* %y) +// CHECK-LABEL: define amdgpu_kernel void @_Z7kernel3PU3AS2iPU3AS1i(i32 addrspace(2)*{{.*}} %x, i32 addrspace(1)*{{.*}} %y) +// CHECK-NOT: = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* __global__ void kernel3(__attribute__((address_space(2))) int *x, __attribute__((address_space(1))) int *y) { y[0] = x[0]; } -// CHECK: define void @_Z4funcPi(i32* %x) +// COMMON-LABEL: define void @_Z4funcPi(i32*{{.*}} %x) +// CHECK-NOT: = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* __device__ void func(int *x) { x[0]++; } @@ -42,16 +57,25 @@ }; // `by-val` struct will be coerced into a similar struct with all generic // pointers lowerd into global ones. -// CHECK: define amdgpu_kernel void @_Z7kernel41S(%struct.S.coerce %s.coerce) // HOST: define void @_Z22__device_stub__kernel41S(i32* %s.coerce0, float* %s.coerce1) +// COMMON-LABEL: define amdgpu_kernel void @_Z7kernel41S(%struct.S.coerce %s.coerce) +// OPT: [[P0:%.*]] = extractvalue %struct.S.coerce %s.coerce, 0 +// OPT: [[P1:%.*]] = extractvalue %struct.S.coerce %s.coerce, 1 +// OPT: [[V0:%.*]] = load i32, i32 addrspace(1)* [[P0]], align 4 +// OPT: [[INC:%.*]] = add nsw i32 [[V0]], 1 +// OPT: store i32 [[INC]], i32 addrspace(1)* [[P0]], align 4 +// OPT: [[V1:%.*]] = load float, float addrspace(1)* [[P1]], align 4 +// OPT: [[ADD:%.*]] = fadd contract float [[V1]], 1.000000e+00 +// OPT: store float [[ADD]], float addrspace(1)* [[P1]], align 4 +// OPT: ret void __global__ void kernel4(struct S s) { s.x[0]++; s.y[0] += 1.f; } // If a pointer to struct is passed, only the pointer itself is coerced into the global one. -// CHECK: define amdgpu_kernel void @_Z7kernel5P1S(%struct.S addrspace(1)* %s.coerce) // HOST: define void @_Z22__device_stub__kernel5P1S(%struct.S* %s) +// COMMON-LABEL: define amdgpu_kernel void @_Z7kernel5P1S(%struct.S addrspace(1)*{{.*}} %s.coerce) __global__ void kernel5(struct S *s) { s->x[0]++; s->y[0] += 1.f; @@ -61,16 +85,26 @@ float *x[2]; }; // `by-val` array is also coerced. -// CHECK: define amdgpu_kernel void @_Z7kernel61T(%struct.T.coerce %t.coerce) // HOST: define void @_Z22__device_stub__kernel61T(float* %t.coerce0, float* %t.coerce1) +// COMMON-LABEL: define amdgpu_kernel void @_Z7kernel61T(%struct.T.coerce %t.coerce) +// OPT: [[ARR:%.*]] = extractvalue %struct.T.coerce %t.coerce, 0 +// OPT: [[P0:%.*]] = extractvalue [2 x float addrspace(1)*] [[ARR]], 0 +// OPT: [[P1:%.*]] = extractvalue [2 x float addrspace(1)*] [[ARR]], 1 +// OPT: [[V0:%.*]] = load float, float addrspace(1)* [[P0]], align 4 +// OPT: [[ADD0:%.*]] = fadd contract float [[V0]], 1.000000e+00 +// OPT: store float [[ADD0]], float addrspace(1)* [[P0]], align 4 +// OPT: [[V1:%.*]] = load float, float addrspace(1)* [[P1]], align 4 +// OPT: [[ADD1:%.*]] = fadd contract float [[V1]], 2.000000e+00 +// OPT: store float [[ADD1]], float addrspace(1)* [[P1]], align 4 +// OPT: ret void __global__ void kernel6(struct T t) { t.x[0][0] += 1.f; t.x[1][0] += 2.f; } // Check that coerced pointers retain the noalias attribute when qualified with __restrict. -// CHECK: define amdgpu_kernel void @_Z7kernel7Pi(i32 addrspace(1)* noalias %x.coerce) // HOST: define void @_Z22__device_stub__kernel7Pi(i32* noalias %x) +// COMMON-LABEL: define amdgpu_kernel void @_Z7kernel7Pi(i32 addrspace(1)* noalias{{.*}} %x.coerce) __global__ void kernel7(int *__restrict x) { x[0]++; } diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp --- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -96,7 +96,6 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" @@ -116,11 +115,13 @@ #include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include #include @@ -132,6 +133,11 @@ using namespace llvm; +static cl::opt AssumeDefaultIsFlatAddressSpace( + "assume-default-is-flat-addrspace", cl::init(false), cl::ReallyHidden, + cl::desc("The default address space is assumed as the flat address space. " + "This is mainly for test purpose.")); + static const unsigned UninitializedAddressSpace = std::numeric_limits::max(); @@ -143,6 +149,7 @@ /// InferAddressSpaces class InferAddressSpaces : public FunctionPass { const TargetTransformInfo *TTI = nullptr; + const DataLayout *DL = nullptr; /// Target specific address space which uses of should be replaced if /// possible. @@ -219,10 +226,45 @@ INITIALIZE_PASS(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces", false, false) +// Check whether that's no-op pointer bicast using a pair of +// `ptrtoint`/`inttoptr` due to the missing no-op pointer bitcast over +// different address spaces. +static bool isNoopPtrIntCastPair(const Operator *I2P, const DataLayout &DL, + const TargetTransformInfo *TTI) { + assert(I2P->getOpcode() == Instruction::IntToPtr); + auto *P2I = dyn_cast(I2P->getOperand(0)); + if (!P2I || P2I->getOpcode() != Instruction::PtrToInt) + return false; + // Check it's really safe to treat that pair of `ptrtoint`/`inttoptr` as a + // no-op cast. Besides checking both of them are no-op casts, as the + // reinterpreted pointer may be used in other pointer arithmetic, we also + // need to double-check that through the target-specific hook. That ensures + // the underlying target also agrees that's a no-op address space cast and + // pointer bits are preserved. + // The current IR spec doesn't have clear rules on address space casts, + // especially a clear definition for pointer bits in non-default address + // spaces. It would be undefined if that pointer is dereferenced after an + // invalid reinterpret cast. Also, due to the unclearness for the meaning of + // bits in non-default address spaces in the current spec, the pointer + // arithmetic may also be undefined after invalid pointer reinterpret cast. + // However, as we confirm through the target hooks that it's a no-op + // addrspacecast, it doesn't matter since the bits should be the same. + return CastInst::isNoopCast(Instruction::CastOps(I2P->getOpcode()), + I2P->getOperand(0)->getType(), I2P->getType(), + DL) && + CastInst::isNoopCast(Instruction::CastOps(P2I->getOpcode()), + P2I->getOperand(0)->getType(), P2I->getType(), + DL) && + TTI->isNoopAddrSpaceCast( + P2I->getOperand(0)->getType()->getPointerAddressSpace(), + I2P->getType()->getPointerAddressSpace()); +} + // Returns true if V is an address expression. // TODO: Currently, we consider only phi, bitcast, addrspacecast, and // getelementptr operators. -static bool isAddressExpression(const Value &V) { +static bool isAddressExpression(const Value &V, const DataLayout &DL, + const TargetTransformInfo *TTI) { const Operator *Op = dyn_cast(&V); if (!Op) return false; @@ -241,6 +283,8 @@ const IntrinsicInst *II = dyn_cast(&V); return II && II->getIntrinsicID() == Intrinsic::ptrmask; } + case Instruction::IntToPtr: + return isNoopPtrIntCastPair(Op, DL, TTI); default: return false; } @@ -249,7 +293,9 @@ // Returns the pointer operands of V. // // Precondition: V is an address expression. -static SmallVector getPointerOperands(const Value &V) { +static SmallVector +getPointerOperands(const Value &V, const DataLayout &DL, + const TargetTransformInfo *TTI) { const Operator &Op = cast(V); switch (Op.getOpcode()) { case Instruction::PHI: { @@ -269,6 +315,11 @@ "unexpected intrinsic call"); return {II.getArgOperand(0)}; } + case Instruction::IntToPtr: { + assert(isNoopPtrIntCastPair(&Op, DL, TTI)); + auto *P2I = cast(Op.getOperand(0)); + return {P2I->getOperand(0)}; + } default: llvm_unreachable("Unexpected instruction type."); } @@ -337,13 +388,13 @@ // expressions. if (ConstantExpr *CE = dyn_cast(V)) { // TODO: Look in non-address parts, like icmp operands. - if (isAddressExpression(*CE) && Visited.insert(CE).second) + if (isAddressExpression(*CE, *DL, TTI) && Visited.insert(CE).second) PostorderStack.emplace_back(CE, false); return; } - if (isAddressExpression(*V) && + if (isAddressExpression(*V, *DL, TTI) && V->getType()->getPointerAddressSpace() == FlatAddrSpace) { if (Visited.insert(V).second) { PostorderStack.emplace_back(V, false); @@ -351,7 +402,7 @@ Operator *Op = cast(V); for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I) { if (ConstantExpr *CE = dyn_cast(Op->getOperand(I))) { - if (isAddressExpression(*CE) && Visited.insert(CE).second) + if (isAddressExpression(*CE, *DL, TTI) && Visited.insert(CE).second) PostorderStack.emplace_back(CE, false); } } @@ -407,6 +458,10 @@ } else if (auto *ASC = dyn_cast(&I)) { if (!ASC->getType()->isVectorTy()) PushPtrOperand(ASC->getPointerOperand()); + } else if (auto *I2P = dyn_cast(&I)) { + if (isNoopPtrIntCastPair(cast(I2P), *DL, TTI)) + PushPtrOperand( + cast(I2P->getOperand(0))->getPointerOperand()); } } @@ -423,7 +478,7 @@ } // Otherwise, adds its operands to the stack and explores them. PostorderStack.back().setInt(true); - for (Value *PtrOperand : getPointerOperands(*TopVal)) { + for (Value *PtrOperand : getPointerOperands(*TopVal, *DL, TTI)) { appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack, Visited); } @@ -536,6 +591,14 @@ assert(I->getType()->isPointerTy()); return SelectInst::Create(I->getOperand(0), NewPointerOperands[1], NewPointerOperands[2], "", nullptr, I); + case Instruction::IntToPtr: { + assert(isNoopPtrIntCastPair(cast(I), *DL, TTI)); + Value *Src = cast(I->getOperand(0))->getOperand(0); + assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace); + if (Src->getType() != NewPtrType) + return new BitCastInst(Src, NewPtrType); + return Src; + } default: llvm_unreachable("Unexpected opcode"); } @@ -545,8 +608,9 @@ // constant expression `CE` with its operands replaced as specified in // ValueWithNewAddrSpace. static Value *cloneConstantExprWithNewAddressSpace( - ConstantExpr *CE, unsigned NewAddrSpace, - const ValueToValueMapTy &ValueWithNewAddrSpace) { + ConstantExpr *CE, unsigned NewAddrSpace, + const ValueToValueMapTy &ValueWithNewAddrSpace, const DataLayout *DL, + const TargetTransformInfo *TTI) { Type *TargetType = CE->getType()->getPointerElementType()->getPointerTo(NewAddrSpace); @@ -577,6 +641,13 @@ } } + if (CE->getOpcode() == Instruction::IntToPtr) { + assert(isNoopPtrIntCastPair(cast(CE), *DL, TTI)); + Constant *Src = cast(CE->getOperand(0))->getOperand(0); + assert(Src->getType()->getPointerAddressSpace() == NewAddrSpace); + return ConstantExpr::getBitCast(Src, TargetType); + } + // Computes the operands of the new constant expression. bool IsNew = false; SmallVector NewOperands; @@ -594,7 +665,7 @@ } if (auto CExpr = dyn_cast(Operand)) if (Value *NewOperand = cloneConstantExprWithNewAddressSpace( - CExpr, NewAddrSpace, ValueWithNewAddrSpace)) { + CExpr, NewAddrSpace, ValueWithNewAddrSpace, DL, TTI)) { IsNew = true; NewOperands.push_back(cast(NewOperand)); continue; @@ -629,7 +700,7 @@ const ValueToValueMapTy &ValueWithNewAddrSpace, SmallVectorImpl *UndefUsesToFix) const { // All values in Postorder are flat address expressions. - assert(isAddressExpression(*V) && + assert(isAddressExpression(*V, *DL, TTI) && V->getType()->getPointerAddressSpace() == FlatAddrSpace); if (Instruction *I = dyn_cast(V)) { @@ -645,7 +716,7 @@ } return cloneConstantExprWithNewAddressSpace( - cast(V), NewAddrSpace, ValueWithNewAddrSpace); + cast(V), NewAddrSpace, ValueWithNewAddrSpace, DL, TTI); } // Defines the join operation on the address space lattice (see the file header @@ -669,6 +740,10 @@ return false; TTI = &getAnalysis().getTTI(F); + DL = &F.getParent()->getDataLayout(); + + if (AssumeDefaultIsFlatAddressSpace) + FlatAddrSpace = 0; if (FlatAddrSpace == UninitializedAddressSpace) { FlatAddrSpace = TTI->getFlatAddressSpace(); @@ -773,7 +848,7 @@ else NewAS = joinAddressSpaces(Src0AS, Src1AS); } else { - for (Value *PtrOperand : getPointerOperands(V)) { + for (Value *PtrOperand : getPointerOperands(V, *DL, TTI)) { auto I = InferredAddrSpace.find(PtrOperand); unsigned OperandAS = I != InferredAddrSpace.end() ? I->second : PtrOperand->getType()->getPointerAddressSpace(); diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll @@ -0,0 +1,101 @@ +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -o - -infer-address-spaces %s | FileCheck -check-prefixes=COMMON,AMDGCN %s +; RUN: opt -S -o - -infer-address-spaces -assume-default-is-flat-addrspace %s | FileCheck -check-prefixes=COMMON,NOTTI %s + +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7" + +; COMMON-LABEL: @noop_ptrint_pair( +; AMDGCN-NEXT: store i32 0, i32 addrspace(1)* %{{.*}} +; AMDGCN-NEXT: ret void +; NOTTI-NEXT: %1 = ptrtoint i32 addrspace(1)* %x.coerce to i64 +; NOTTI-NEXT: %2 = inttoptr i64 %1 to i32* +; NOTTI-NEXT: store i32 0, i32* %2 +; NOTTI-NEXT: ret void +define void @noop_ptrint_pair(i32 addrspace(1)* %x.coerce) { + %1 = ptrtoint i32 addrspace(1)* %x.coerce to i64 + %2 = inttoptr i64 %1 to i32* + store i32 0, i32* %2 + ret void +} + +; COMMON-LABEL: @non_noop_ptrint_pair( +; AMDGCN-NEXT: ptrtoint i32 addrspace(3)* %{{.*}} to i64 +; AMDGCN-NEXT: inttoptr i64 %{{.*}} to i32* +; AMDGCN-NEXT: store i32 0, i32* %{{.*}} +; AMDGCN-NEXT: ret void +; NOTTI-NEXT: ptrtoint i32 addrspace(3)* %{{.*}} to i64 +; NOTTI-NEXT: inttoptr i64 %{{.*}} to i32* +; NOTTI-NEXT: store i32 0, i32* %{{.*}} +; NOTTI-NEXT: ret void +define void @non_noop_ptrint_pair(i32 addrspace(3)* %x.coerce) { + %1 = ptrtoint i32 addrspace(3)* %x.coerce to i64 + %2 = inttoptr i64 %1 to i32* + store i32 0, i32* %2 + ret void +} + +; COMMON-LABEL: @non_noop_ptrint_pair2( +; AMDGCN-NEXT: ptrtoint i32 addrspace(1)* %{{.*}} to i32 +; AMDGCN-NEXT: inttoptr i32 %{{.*}} to i32* +; AMDGCN-NEXT: store i32 0, i32* %{{.*}} +; AMDGCN-NEXT: ret void +; NOTTI-NEXT: ptrtoint i32 addrspace(1)* %{{.*}} to i32 +; NOTTI-NEXT: inttoptr i32 %{{.*}} to i32* +; NOTTI-NEXT: store i32 0, i32* %{{.*}} +; NOTTI-NEXT: ret void +define void @non_noop_ptrint_pair2(i32 addrspace(1)* %x.coerce) { + %1 = ptrtoint i32 addrspace(1)* %x.coerce to i32 + %2 = inttoptr i32 %1 to i32* + store i32 0, i32* %2 + ret void +} + +@g = addrspace(1) global i32 0, align 4 +@l = addrspace(3) global i32 0, align 4 + +; COMMON-LABEL: @noop_ptrint_pair_ce( +; AMDGCN-NEXT: store i32 0, i32 addrspace(1)* @g +; AMDGCN-NEXT: ret void +; NOTTI-NEXT: store i32 0, i32* inttoptr (i64 ptrtoint (i32 addrspace(1)* @g to i64) to i32*) +; NOTTI-NEXT: ret void +define void @noop_ptrint_pair_ce() { + store i32 0, i32* inttoptr (i64 ptrtoint (i32 addrspace(1)* @g to i64) to i32*) + ret void +} + +; COMMON-LABEL: @noop_ptrint_pair_ce2( +; AMDGCN-NEXT: ret i32* addrspacecast (i32 addrspace(1)* @g to i32*) +; NOTTI-NEXT: ret i32* inttoptr (i64 ptrtoint (i32 addrspace(1)* @g to i64) to i32*) +define i32* @noop_ptrint_pair_ce2() { + ret i32* inttoptr (i64 ptrtoint (i32 addrspace(1)* @g to i64) to i32*) +} + +; COMMON-LABEL: @non_noop_ptrint_pair_ce( +; AMDGCN-NEXT: store i32 0, i32* inttoptr (i64 ptrtoint (i32 addrspace(3)* @l to i64) to i32*) +; AMDGCN-NEXT: ret void +; NOTTI-NEXT: store i32 0, i32* inttoptr (i64 ptrtoint (i32 addrspace(3)* @l to i64) to i32*) +; NOTTI-NEXT: ret void +define void @non_noop_ptrint_pair_ce() { + store i32 0, i32* inttoptr (i64 ptrtoint (i32 addrspace(3)* @l to i64) to i32*) + ret void +} + +; COMMON-LABEL: @non_noop_ptrint_pair_ce2( +; AMDGCN-NEXT: ret i32* inttoptr (i64 ptrtoint (i32 addrspace(3)* @l to i64) to i32*) +; NOTTI-NEXT: ret i32* inttoptr (i64 ptrtoint (i32 addrspace(3)* @l to i64) to i32*) +define i32* @non_noop_ptrint_pair_ce2() { + ret i32* inttoptr (i64 ptrtoint (i32 addrspace(3)* @l to i64) to i32*) +} + +; COMMON-LABEL: @non_noop_ptrint_pair_ce3( +; AMDGCN-NEXT: ret i32* inttoptr (i32 ptrtoint (i32 addrspace(1)* @g to i32) to i32*) +; NOTTI-NEXT: ret i32* inttoptr (i32 ptrtoint (i32 addrspace(1)* @g to i32) to i32*) +define i32* @non_noop_ptrint_pair_ce3() { + ret i32* inttoptr (i32 ptrtoint (i32 addrspace(1)* @g to i32) to i32*) +} + +; COMMON-LABEL: @non_noop_ptrint_pair_ce4( +; AMDGCN-NEXT: ret i32* inttoptr (i128 ptrtoint (i32 addrspace(3)* @l to i128) to i32*) +; NOTTI-NEXT: ret i32* inttoptr (i128 ptrtoint (i32 addrspace(3)* @l to i128) to i32*) +define i32* @non_noop_ptrint_pair_ce4() { + ret i32* inttoptr (i128 ptrtoint (i32 addrspace(3)* @l to i128) to i32*) +}