Index: clang/lib/Sema/SemaDeclAttr.cpp =================================================================== --- clang/lib/Sema/SemaDeclAttr.cpp +++ clang/lib/Sema/SemaDeclAttr.cpp @@ -4297,7 +4297,7 @@ // extern __shared__ is only allowed on arrays with no length (e.g. // "int x[]"). if (!S.getLangOpts().GPURelocatableDeviceCode && VD->hasExternalStorage() && - !isa(VD->getType())) { + !isa(VD->getType()) && !S.getLangOpts().HIP) { S.Diag(AL.getLoc(), diag::err_cuda_extern_shared) << VD; return; } Index: clang/test/CodeGenCUDA/extern-shared.cu =================================================================== --- /dev/null +++ clang/test/CodeGenCUDA/extern-shared.cu @@ -0,0 +1,35 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - -fcuda-is-device \ +// RUN: -target-cpu gfx906 -x hip %s | FileCheck %s + +#include "Inputs/cuda.h" + +// CHECK: @global = external addrspace(3) global i32, align 4 +// CHECK: @global_arr = external addrspace(3) global [0 x i32], align 4 +// CHECK: @global_arr1 = external addrspace(3) global [1 x i32], align 4 +// CHECK: @global_ptr = external addrspace(3) global i32*, align 8 +// CHECK: @x = external addrspace(3) global i32, align 4 +// CHECK: @arr = external addrspace(3) global [0 x i32], align 4 +// CHECK: @arr1 = external addrspace(3) global [1 x i32], align 4 +// CHECK: @ptr = external addrspace(3) global i32*, align 8 + +extern __shared__ int global; +extern __shared__ int global_arr[]; +extern __shared__ int global_arr1[1]; +extern __shared__ int* global_ptr; + +__global__ void foo() { + extern __shared__ int x; + extern __shared__ int arr[]; + extern __shared__ int arr1[1]; + extern __shared__ int* ptr; + global = 1; + global_arr[0] = 1; + global_arr1[0] = 1; + *global_ptr = 1; + x = 1; + arr[0] = 1; + arr1[0] = 1; + *ptr = 1; +} + Index: clang/test/SemaCUDA/extern-shared.cu =================================================================== --- clang/test/SemaCUDA/extern-shared.cu +++ clang/test/SemaCUDA/extern-shared.cu @@ -1,11 +1,16 @@ // RUN: %clang_cc1 -fsyntax-only -Wundefined-internal -verify %s // RUN: %clang_cc1 -fsyntax-only -Wundefined-internal -fcuda-is-device -verify %s +// RUN: %clang_cc1 -fsyntax-only -Wundefined-internal -verify=hip -x hip %s +// RUN: %clang_cc1 -fsyntax-only -Wundefined-internal -fcuda-is-device \ +// RUN: -verify=hip -x hip %s + // RUN: %clang_cc1 -fsyntax-only -Wundefined-internal -fgpu-rdc -verify=rdc %s // RUN: %clang_cc1 -fsyntax-only -Wundefined-internal -fcuda-is-device -fgpu-rdc -verify=rdc %s // Most of these declarations are fine in separate compilation mode. +// hip-no-diagnostics #include "Inputs/cuda.h" __device__ void foo() {