diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu --- a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu +++ b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu @@ -18,7 +18,7 @@ // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel1Pi(i32 addrspace(1)*{{.*}} %x.coerce) // CHECK: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* // CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* -// OPT: [[VAL:%.*]] = load i32, i32 addrspace(4)* %x.coerce.const, align 4 +// OPT: [[VAL:%.*]] = load i32, i32 addrspace(1)* %x.coerce, align 4, !amdgpu.noclobber !2 // OPT: [[INC:%.*]] = add nsw i32 [[VAL]], 1 // OPT: store i32 [[INC]], i32 addrspace(1)* %x.coerce, align 4 // OPT: ret void @@ -30,7 +30,7 @@ // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel2Ri(i32 addrspace(1)*{{.*}} nonnull align 4 dereferenceable(4) %x.coerce) // CHECK: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* // CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* -// OPT: [[VAL:%.*]] = load i32, i32 addrspace(4)* %x.coerce.const, align 4 +// OPT: [[VAL:%.*]] = load i32, i32 addrspace(1)* %x.coerce, align 4, !amdgpu.noclobber !2 // OPT: [[INC:%.*]] = add nsw i32 [[VAL]], 1 // OPT: store i32 [[INC]], i32 addrspace(1)* %x.coerce, align 4 // OPT: ret void @@ -68,8 +68,7 @@ // OPT: [[R1:%.*]] = getelementptr inbounds %struct.S, %struct.S addrspace(4)* %0, i64 0, i32 1 // OPT: [[P1:%.*]] = load float*, float* addrspace(4)* [[R1]], align 8 // OPT: [[G1:%.*]] ={{.*}} addrspacecast float* [[P1]] to float addrspace(1)* -// OPT: [[G2:%.*]] ={{.*}} addrspacecast i32* [[P0]] to i32 addrspace(4)* -// OPT: [[V0:%.*]] = load i32, i32 addrspace(4)* [[G2]], align 4 +// OPT: [[V0:%.*]] = load i32, i32 addrspace(1)* [[G0]], align 4, !amdgpu.noclobber !2 // OPT: [[INC:%.*]] = add nsw i32 [[V0]], 1 // OPT: store i32 [[INC]], i32 addrspace(1)* [[G0]], align 4 // OPT: [[V1:%.*]] = load float, float addrspace(1)* [[G1]], align 4 @@ -104,8 +103,7 @@ // OPT: [[R1:%.*]] = getelementptr inbounds %struct.T, %struct.T addrspace(4)* %0, i64 0, i32 0, i64 1 // OPT: [[P1:%.*]] = load float*, float* addrspace(4)* [[R1]], align 8 // OPT: [[G1:%.*]] ={{.*}} addrspacecast float* [[P1]] to float addrspace(1)* -// OPT: [[G2:%.*]] ={{.*}} addrspacecast float* [[P0]] to float addrspace(4)* -// OPT: [[V0:%.*]] = load float, float addrspace(4)* [[G2]], align 4 +// OPT: [[V0:%.*]] = load float, float addrspace(1)* [[G0]], align 4, !amdgpu.noclobber !2 // OPT: [[ADD0:%.*]] = fadd contract float [[V0]], 1.000000e+00 // OPT: store float [[ADD0]], float addrspace(1)* [[G0]], align 4 // OPT: [[V1:%.*]] = load float, float addrspace(1)* [[G1]], align 4 @@ -132,7 +130,7 @@ // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel82SS(float addrspace(1)*{{.*}} %a.coerce) // CHECK: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* // CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* -// OPT: [[VAL:%.*]] = load float, float addrspace(4)* %a.coerce.const, align 4 +// OPT: [[VAL:%.*]] = load float, float addrspace(1)* %a.coerce, align 4, !amdgpu.noclobber !2 // OPT: [[INC:%.*]] = fadd contract float [[VAL]], 3.000000e+00 // OPT: store float [[INC]], float addrspace(1)* %a.coerce, align 4 // OPT: ret void diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp @@ -131,31 +131,7 @@ if (!LI->isSimple()) return false; - Value *Ptr = LI->getPointerOperand(); - - // Strip casts we have created earlier. - Value *OrigPtr = Ptr; - PointerType *PT; - for ( ; ; ) { - PT = cast(OrigPtr->getType()); - if (PT->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) - return false; - auto *P = dyn_cast(OrigPtr); - if (!P) - break; - auto *NewPtr = P->getPointerOperand(); - if (!cast(NewPtr->getType())->hasSameElementTypeAs(PT)) - break; - OrigPtr = NewPtr; - } - - IRBuilder<> B(LI); - - PointerType *NewPT = - PointerType::getWithSamePointeeType(PT, AMDGPUAS::CONSTANT_ADDRESS); - Value *Cast = B.CreateAddrSpaceCast(OrigPtr, NewPT, - Twine(OrigPtr->getName(), ".const")); - LI->replaceUsesOfWith(Ptr, Cast); + LI->setMetadata("amdgpu.noclobber", MDNode::get(LI->getContext(), {})); return true; } diff --git a/llvm/test/CodeGen/AMDGPU/promote-kernel-arguments.ll b/llvm/test/CodeGen/AMDGPU/promote-kernel-arguments.ll --- a/llvm/test/CodeGen/AMDGPU/promote-kernel-arguments.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-kernel-arguments.ll @@ -11,15 +11,11 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() ; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float**, float** addrspace(1)* [[ARG:%.*]], i32 [[I]] -; CHECK-NEXT: [[P1_CONST:%.*]] = addrspacecast float** addrspace(1)* [[P1]] to float** addrspace(4)* -; CHECK-NEXT: [[P2:%.*]] = load float**, float** addrspace(4)* [[P1_CONST]], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast float** [[P2]] to float* addrspace(1)* -; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast float* addrspace(1)* [[TMP0]] to float** -; CHECK-NEXT: [[P2_FLAT:%.*]] = addrspacecast float* addrspace(1)* [[TMP0]] to float** -; CHECK-NEXT: [[P2_CONST:%.*]] = addrspacecast float** [[TMP1]] to float* addrspace(4)* -; CHECK-NEXT: [[P3:%.*]] = load float*, float* addrspace(4)* [[P2_CONST]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast float* [[P3]] to float addrspace(1)* -; CHECK-NEXT: store float 0.000000e+00, float addrspace(1)* [[TMP2]], align 4 +; CHECK-NEXT: [[P2:%.*]] = load float**, float** addrspace(1)* [[P1]], align 8, !amdgpu.noclobber !0 +; CHECK-NEXT: [[P2_GLOBAL:%.*]] = addrspacecast float** [[P2]] to float* addrspace(1)* +; CHECK-NEXT: [[P3:%.*]] = load float*, float* addrspace(1)* [[P2_GLOBAL]], align 8, !amdgpu.noclobber !0 +; CHECK-NEXT: [[P3_GLOBAL:%.*]] = addrspacecast float* [[P3]] to float addrspace(1)* +; CHECK-NEXT: store float 0.000000e+00, float addrspace(1)* [[P3_GLOBAL]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -41,11 +37,9 @@ ; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() ; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float*, float* addrspace(1)* [[ARG_GLOBAL]], i32 [[I]] ; CHECK-NEXT: [[P1_CAST:%.*]] = bitcast float* addrspace(1)* [[P1]] to i32* addrspace(1)* -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast i32* addrspace(1)* [[P1_CAST]] to i32** -; CHECK-NEXT: [[P1_CAST_CONST:%.*]] = addrspacecast i32** [[TMP0]] to i32* addrspace(4)* -; CHECK-NEXT: [[P2:%.*]] = load i32*, i32* addrspace(4)* [[P1_CAST_CONST]], align 8 -; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast i32* [[P2]] to i32 addrspace(1)* -; CHECK-NEXT: store i32 0, i32 addrspace(1)* [[TMP1]], align 4 +; CHECK-NEXT: [[P2:%.*]] = load i32*, i32* addrspace(1)* [[P1_CAST]], align 8, !amdgpu.noclobber !0 +; CHECK-NEXT: [[P2_GLOBAL:%.*]] = addrspacecast i32* [[P2]] to i32 addrspace(1)* +; CHECK-NEXT: store i32 0, i32 addrspace(1)* [[P2_GLOBAL]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -66,11 +60,10 @@ ; CHECK-LABEL: @ptr_in_struct( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], [[STRUCT_S]] addrspace(1)* [[ARG:%.*]], i64 0, i32 0 -; CHECK-NEXT: [[P_CONST:%.*]] = addrspacecast float* addrspace(1)* [[P]] to float* addrspace(4)* -; CHECK-NEXT: [[P1:%.*]] = load float*, float* addrspace(4)* [[P_CONST]], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast float* [[P1]] to float addrspace(1)* +; CHECK-NEXT: [[P1:%.*]] = load float*, float* addrspace(1)* [[P]], align 8, !amdgpu.noclobber !0 +; CHECK-NEXT: [[P1_GLOBAL:%.*]] = addrspacecast float* [[P1]] to float addrspace(1)* ; CHECK-NEXT: [[ID:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float addrspace(1)* [[TMP0]], i32 [[ID]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float addrspace(1)* [[P1_GLOBAL]], i32 [[ID]] ; CHECK-NEXT: store float 0.000000e+00, float addrspace(1)* [[ARRAYIDX]], align 4 ; CHECK-NEXT: ret void ; @@ -97,26 +90,22 @@ ; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() ; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float*, float* addrspace(1)* [[ARG_GLOBAL]], i64 [[IDXPROM]] -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast float* addrspace(1)* [[ARRAYIDX10]] to float** -; CHECK-NEXT: [[ARRAYIDX10_CONST:%.*]] = addrspacecast float** [[TMP0]] to float* addrspace(4)* -; CHECK-NEXT: [[I1:%.*]] = load float*, float* addrspace(4)* [[ARRAYIDX10_CONST]], align 8 -; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast float* [[I1]] to float addrspace(1)* -; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast float addrspace(1)* [[TMP1]] to float* -; CHECK-NEXT: [[I1_CONST:%.*]] = addrspacecast float* [[TMP2]] to float addrspace(4)* -; CHECK-NEXT: [[I2:%.*]] = load float, float addrspace(4)* [[I1_CONST]], align 4 +; CHECK-NEXT: [[I1:%.*]] = load float*, float* addrspace(1)* [[ARRAYIDX10]], align 8, !amdgpu.noclobber !0 +; CHECK-NEXT: [[I1_GLOBAL:%.*]] = addrspacecast float* [[I1]] to float addrspace(1)* +; CHECK-NEXT: [[I2:%.*]] = load float, float addrspace(1)* [[I1_GLOBAL]], align 4, !amdgpu.noclobber !0 ; CHECK-NEXT: [[ARRAYIDX512:%.*]] = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 [[X:%.*]] ; CHECK-NEXT: store float [[I2]], float addrspace(3)* [[ARRAYIDX512]], align 4 -; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float addrspace(1)* [[TMP1]], i64 1 +; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float addrspace(1)* [[I1_GLOBAL]], i64 1 ; CHECK-NEXT: [[I3:%.*]] = load float, float addrspace(1)* [[ARRAYIDX3_1]], align 4 ; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[X]], 1 ; CHECK-NEXT: [[ARRAYIDX512_1:%.*]] = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 [[ADD_1]] ; CHECK-NEXT: store float [[I3]], float addrspace(3)* [[ARRAYIDX512_1]], align 4 -; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float addrspace(1)* [[TMP1]], i64 2 +; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float addrspace(1)* [[I1_GLOBAL]], i64 2 ; CHECK-NEXT: [[I4:%.*]] = load float, float addrspace(1)* [[ARRAYIDX3_2]], align 4 ; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[X]], 2 ; CHECK-NEXT: [[ARRAYIDX512_2:%.*]] = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 [[ADD_2]] ; CHECK-NEXT: store float [[I4]], float addrspace(3)* [[ARRAYIDX512_2]], align 4 -; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float addrspace(1)* [[TMP1]], i64 3 +; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float addrspace(1)* [[I1_GLOBAL]], i64 3 ; CHECK-NEXT: [[I5:%.*]] = load float, float addrspace(1)* [[ARRAYIDX3_3]], align 4 ; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[X]], 3 ; CHECK-NEXT: [[ARRAYIDX512_3:%.*]] = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 [[ADD_3]] @@ -125,12 +114,10 @@ ; CHECK-NEXT: [[ARRAYIDX711:%.*]] = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 [[SUB]] ; CHECK-NEXT: [[I6:%.*]] = load float, float addrspace(3)* [[ARRAYIDX711]], align 4 ; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds float*, float* addrspace(1)* [[OUT_GLOBAL]], i64 [[IDXPROM]] -; CHECK-NEXT: [[TMP3:%.*]] = addrspacecast float* addrspace(1)* [[ARRAYIDX11]] to float** -; CHECK-NEXT: [[ARRAYIDX11_CONST:%.*]] = addrspacecast float** [[TMP3]] to float* addrspace(4)* -; CHECK-NEXT: [[I7:%.*]] = load float*, float* addrspace(4)* [[ARRAYIDX11_CONST]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast float* [[I7]] to float addrspace(1)* +; CHECK-NEXT: [[I7:%.*]] = load float*, float* addrspace(1)* [[ARRAYIDX11]], align 8, !amdgpu.noclobber !0 +; CHECK-NEXT: [[I7_GLOBAL:%.*]] = addrspacecast float* [[I7]] to float addrspace(1)* ; CHECK-NEXT: [[IDXPROM8:%.*]] = sext i32 [[X]] to i64 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float addrspace(1)* [[TMP4]], i64 [[IDXPROM8]] +; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float addrspace(1)* [[I7_GLOBAL]], i64 [[IDXPROM8]] ; CHECK-NEXT: store float [[I6]], float addrspace(1)* [[ARRAYIDX9]], align 4 ; CHECK-NEXT: ret void ; @@ -178,25 +165,22 @@ ; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() ; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float*, float* addrspace(1)* [[ARG:%.*]], i64 [[IDXPROM]] -; CHECK-NEXT: [[ARRAYIDX10_CONST:%.*]] = addrspacecast float* addrspace(1)* [[ARRAYIDX10]] to float* addrspace(4)* -; CHECK-NEXT: [[I1:%.*]] = load float*, float* addrspace(4)* [[ARRAYIDX10_CONST]], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast float* [[I1]] to float addrspace(1)* -; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast float addrspace(1)* [[TMP0]] to float* -; CHECK-NEXT: [[I1_CONST:%.*]] = addrspacecast float* [[TMP1]] to float addrspace(4)* -; CHECK-NEXT: [[I2:%.*]] = load float, float addrspace(4)* [[I1_CONST]], align 4 +; CHECK-NEXT: [[I1:%.*]] = load float*, float* addrspace(1)* [[ARRAYIDX10]], align 8, !amdgpu.noclobber !0 +; CHECK-NEXT: [[I1_GLOBAL:%.*]] = addrspacecast float* [[I1]] to float addrspace(1)* +; CHECK-NEXT: [[I2:%.*]] = load float, float addrspace(1)* [[I1_GLOBAL]], align 4, !amdgpu.noclobber !0 ; CHECK-NEXT: [[ARRAYIDX512:%.*]] = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 [[X:%.*]] ; CHECK-NEXT: store float [[I2]], float addrspace(3)* [[ARRAYIDX512]], align 4 -; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float addrspace(1)* [[TMP0]], i64 1 +; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float addrspace(1)* [[I1_GLOBAL]], i64 1 ; CHECK-NEXT: [[I3:%.*]] = load float, float addrspace(1)* [[ARRAYIDX3_1]], align 4 ; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[X]], 1 ; CHECK-NEXT: [[ARRAYIDX512_1:%.*]] = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 [[ADD_1]] ; CHECK-NEXT: store float [[I3]], float addrspace(3)* [[ARRAYIDX512_1]], align 4 -; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float addrspace(1)* [[TMP0]], i64 2 +; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float addrspace(1)* [[I1_GLOBAL]], i64 2 ; CHECK-NEXT: [[I4:%.*]] = load float, float addrspace(1)* [[ARRAYIDX3_2]], align 4 ; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[X]], 2 ; CHECK-NEXT: [[ARRAYIDX512_2:%.*]] = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 [[ADD_2]] ; CHECK-NEXT: store float [[I4]], float addrspace(3)* [[ARRAYIDX512_2]], align 4 -; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float addrspace(1)* [[TMP0]], i64 3 +; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float addrspace(1)* [[I1_GLOBAL]], i64 3 ; CHECK-NEXT: [[I5:%.*]] = load float, float addrspace(1)* [[ARRAYIDX3_3]], align 4 ; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[X]], 3 ; CHECK-NEXT: [[ARRAYIDX512_3:%.*]] = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 [[ADD_3]] @@ -205,7 +189,7 @@ ; CHECK-NEXT: [[ARRAYIDX711:%.*]] = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 [[SUB]] ; CHECK-NEXT: [[I6:%.*]] = load float, float addrspace(3)* [[ARRAYIDX711]], align 4 ; CHECK-NEXT: [[IDXPROM8:%.*]] = sext i32 [[X]] to i64 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float addrspace(1)* [[TMP0]], i64 [[IDXPROM8]] +; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float addrspace(1)* [[I1_GLOBAL]], i64 [[IDXPROM8]] ; CHECK-NEXT: store float [[I6]], float addrspace(1)* [[ARRAYIDX9]], align 4 ; CHECK-NEXT: ret void ; @@ -296,19 +280,18 @@ ; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() ; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float*, float* addrspace(1)* [[ARG:%.*]], i64 [[IDXPROM]] -; CHECK-NEXT: [[ARRAYIDX10_CONST:%.*]] = addrspacecast float* addrspace(1)* [[ARRAYIDX10]] to float* addrspace(4)* -; CHECK-NEXT: [[I1:%.*]] = load float*, float* addrspace(4)* [[ARRAYIDX10_CONST]], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast float* [[I1]] to float addrspace(1)* +; CHECK-NEXT: [[I1:%.*]] = load float*, float* addrspace(1)* [[ARRAYIDX10]], align 8, !amdgpu.noclobber !0 +; CHECK-NEXT: [[I1_GLOBAL:%.*]] = addrspacecast float* [[I1]] to float addrspace(1)* ; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds float*, float* addrspace(1)* [[ARRAYIDX10]], i32 [[X:%.*]] ; CHECK-NEXT: store float* null, float* addrspace(1)* [[ARRAYIDX11]], align 4 -; CHECK-NEXT: [[I2:%.*]] = load float, float addrspace(1)* [[TMP0]], align 4 +; CHECK-NEXT: [[I2:%.*]] = load float, float addrspace(1)* [[I1_GLOBAL]], align 4 ; CHECK-NEXT: [[ARRAYIDX512:%.*]] = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 [[X]] ; CHECK-NEXT: store float [[I2]], float addrspace(3)* [[ARRAYIDX512]], align 4 ; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[X]], -1 ; CHECK-NEXT: [[ARRAYIDX711:%.*]] = getelementptr inbounds [4 x float], [4 x float] addrspace(3)* @LDS, i32 0, i32 [[SUB]] ; CHECK-NEXT: [[I6:%.*]] = load float, float addrspace(3)* [[ARRAYIDX711]], align 4 ; CHECK-NEXT: [[IDXPROM8:%.*]] = sext i32 [[X]] to i64 -; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float addrspace(1)* [[TMP0]], i64 [[IDXPROM8]] +; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float addrspace(1)* [[I1_GLOBAL]], i64 [[IDXPROM8]] ; CHECK-NEXT: store float [[I6]], float addrspace(1)* [[ARRAYIDX9]], align 4 ; CHECK-NEXT: ret void ; @@ -340,15 +323,11 @@ ; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() ; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float**, float** addrspace(1)* [[ARG:%.*]], i32 [[I]] ; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier() -; CHECK-NEXT: [[P1_CONST:%.*]] = addrspacecast float** addrspace(1)* [[P1]] to float** addrspace(4)* -; CHECK-NEXT: [[P2:%.*]] = load float**, float** addrspace(4)* [[P1_CONST]], align 8 -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast float** [[P2]] to float* addrspace(1)* -; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast float* addrspace(1)* [[TMP0]] to float** -; CHECK-NEXT: [[P2_FLAT:%.*]] = addrspacecast float* addrspace(1)* [[TMP0]] to float** -; CHECK-NEXT: [[P2_CONST:%.*]] = addrspacecast float** [[TMP1]] to float* addrspace(4)* -; CHECK-NEXT: [[P3:%.*]] = load float*, float* addrspace(4)* [[P2_CONST]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast float* [[P3]] to float addrspace(1)* -; CHECK-NEXT: store float 0.000000e+00, float addrspace(1)* [[TMP2]], align 4 +; CHECK-NEXT: [[P2:%.*]] = load float**, float** addrspace(1)* [[P1]], align 8, !amdgpu.noclobber !0 +; CHECK-NEXT: [[P2_GLOBAL:%.*]] = addrspacecast float** [[P2]] to float* addrspace(1)* +; CHECK-NEXT: [[P3:%.*]] = load float*, float* addrspace(1)* [[P2_GLOBAL]], align 8, !amdgpu.noclobber !0 +; CHECK-NEXT: [[P3_GLOBAL:%.*]] = addrspacecast float* [[P3]] to float addrspace(1)* +; CHECK-NEXT: store float 0.000000e+00, float addrspace(1)* [[P3_GLOBAL]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -370,11 +349,9 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARG_GLOBAL:%.*]] = addrspacecast float** [[ARG:%.*]] to float* addrspace(1)* ; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float*, float* addrspace(1)* [[ARG_GLOBAL]], i32 [[I:%.*]] -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast float* addrspace(1)* [[P1]] to float** -; CHECK-NEXT: [[P1_CONST:%.*]] = addrspacecast float** [[TMP0]] to float* addrspace(4)* -; CHECK-NEXT: [[P2:%.*]] = load float*, float* addrspace(4)* [[P1_CONST]], align 8 -; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast float* [[P2]] to float addrspace(1)* -; CHECK-NEXT: store float 0.000000e+00, float addrspace(1)* [[TMP1]], align 4 +; CHECK-NEXT: [[P2:%.*]] = load float*, float* addrspace(1)* [[P1]], align 8, !amdgpu.noclobber !0 +; CHECK-NEXT: [[P2_GLOBAL:%.*]] = addrspacecast float* [[P2]] to float addrspace(1)* +; CHECK-NEXT: store float 0.000000e+00, float addrspace(1)* [[P2_GLOBAL]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -393,8 +370,8 @@ ; CHECK-LABEL: @const_ptr_nest_3( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float* addrspace(4)*, float* addrspace(4)* addrspace(4)* [[ARG:%.*]], i32 [[I:%.*]] -; CHECK-NEXT: [[P2:%.*]] = load float* addrspace(4)*, float* addrspace(4)* addrspace(4)* [[P1]], align 8 -; CHECK-NEXT: [[P3:%.*]] = load float*, float* addrspace(4)* [[P2]], align 8 +; CHECK-NEXT: [[P2:%.*]] = load float* addrspace(4)*, float* addrspace(4)* addrspace(4)* [[P1]], align 8, !amdgpu.noclobber !0 +; CHECK-NEXT: [[P3:%.*]] = load float*, float* addrspace(4)* [[P2]], align 8, !amdgpu.noclobber !0 ; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast float* [[P3]] to float addrspace(1)* ; CHECK-NEXT: store float 0.000000e+00, float addrspace(1)* [[TMP0]], align 4 ; CHECK-NEXT: ret void @@ -416,8 +393,8 @@ ; CHECK-LABEL: @cast_from_const_const_ptr_nest_3( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float* addrspace(4)*, float* addrspace(4)* addrspace(4)* [[ARG:%.*]], i32 [[I:%.*]] -; CHECK-NEXT: [[P2:%.*]] = load float* addrspace(4)*, float* addrspace(4)* addrspace(4)* [[P1]], align 8 -; CHECK-NEXT: [[P3:%.*]] = load float*, float* addrspace(4)* [[P2]], align 8 +; CHECK-NEXT: [[P2:%.*]] = load float* addrspace(4)*, float* addrspace(4)* addrspace(4)* [[P1]], align 8, !amdgpu.noclobber !0 +; CHECK-NEXT: [[P3:%.*]] = load float*, float* addrspace(4)* [[P2]], align 8, !amdgpu.noclobber !0 ; CHECK-NEXT: [[P3_GLOBAL:%.*]] = addrspacecast float* [[P3]] to float addrspace(1)* ; CHECK-NEXT: store float 0.000000e+00, float addrspace(1)* [[P3_GLOBAL]], align 4 ; CHECK-NEXT: ret void @@ -485,15 +462,11 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float* addrspace(1)*, float* addrspace(1)* addrspace(1)* [[ARG:%.*]], i32 [[I:%.*]] ; CHECK-NEXT: [[A1:%.*]] = bitcast float* addrspace(1)* addrspace(1)* [[P1]] to i32* addrspace(1)* addrspace(1)* -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast float* addrspace(1)* addrspace(1)* [[P1]] to i32* addrspace(1)** -; CHECK-NEXT: [[A1_CONST:%.*]] = addrspacecast i32* addrspace(1)** [[TMP0]] to i32* addrspace(1)* addrspace(4)* -; CHECK-NEXT: [[P2:%.*]] = load i32* addrspace(1)*, i32* addrspace(1)* addrspace(4)* [[A1_CONST]], align 8 +; CHECK-NEXT: [[P2:%.*]] = load i32* addrspace(1)*, i32* addrspace(1)* addrspace(1)* [[A1]], align 8, !amdgpu.noclobber !0 ; CHECK-NEXT: [[A2:%.*]] = bitcast i32* addrspace(1)* [[P2]] to float* addrspace(1)* -; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast i32* addrspace(1)* [[P2]] to float** -; CHECK-NEXT: [[A2_CONST:%.*]] = addrspacecast float** [[TMP1]] to float* addrspace(4)* -; CHECK-NEXT: [[P3:%.*]] = load float*, float* addrspace(4)* [[A2_CONST]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast float* [[P3]] to float addrspace(1)* -; CHECK-NEXT: store float 0.000000e+00, float addrspace(1)* [[TMP2]], align 4 +; CHECK-NEXT: [[P3:%.*]] = load float*, float* addrspace(1)* [[A2]], align 8, !amdgpu.noclobber !0 +; CHECK-NEXT: [[P3_GLOBAL:%.*]] = addrspacecast float* [[P3]] to float addrspace(1)* +; CHECK-NEXT: store float 0.000000e+00, float addrspace(1)* [[P3_GLOBAL]], align 4 ; CHECK-NEXT: ret void ; entry: