Index: clang/lib/Basic/Targets/AMDGPU.h =================================================================== --- clang/lib/Basic/Targets/AMDGPU.h +++ clang/lib/Basic/Targets/AMDGPU.h @@ -41,6 +41,7 @@ llvm::AMDGPU::GPUKind GPUKind; unsigned GPUFeatures; + unsigned WavefrontSize; /// Target ID is device name followed by optional feature name postfixed /// by plus or minus sign delimitted by colon, e.g. gfx908:xnack+:sram-ecc-. @@ -407,6 +408,8 @@ getAllPossibleTargetIDFeatures(getTriple(), getArchNameAMDGCN(GPUKind)); llvm::for_each(Features, [&](const auto &F) { assert(F.front() == '+' || F.front() == '-'); + if (F == "+wavefrontsize64") + WavefrontSize = 64; bool IsOn = F.front() == '+'; StringRef Name = StringRef(F).drop_front(); if (llvm::find(TargetIDFeatures, Name) == TargetIDFeatures.end()) Index: clang/lib/Basic/Targets/AMDGPU.cpp =================================================================== --- clang/lib/Basic/Targets/AMDGPU.cpp +++ clang/lib/Basic/Targets/AMDGPU.cpp @@ -316,6 +316,7 @@ HasLegalHalfType = true; HasFloat16 = true; + WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64; // Set pointer width and alignment for target address space 0. PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits(); @@ -388,6 +389,8 @@ Builder.defineMacro("__HAS_FP64__"); if (hasFastFMA()) Builder.defineMacro("FP_FAST_FMA"); + + Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize)); } void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) { Index: clang/lib/Driver/ToolChains/AMDGPU.cpp =================================================================== --- clang/lib/Driver/ToolChains/AMDGPU.cpp +++ clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -390,16 +390,9 @@ } } - if (Args.getLastArg(options::OPT_mwavefrontsize64)) { - Features.push_back("-wavefrontsize16"); - Features.push_back("-wavefrontsize32"); + if (Args.hasFlag(options::OPT_mwavefrontsize64, + options::OPT_mno_wavefrontsize64, false)) Features.push_back("+wavefrontsize64"); - } - if (Args.getLastArg(options::OPT_mno_wavefrontsize64)) { - Features.push_back("-wavefrontsize16"); - Features.push_back("+wavefrontsize32"); - Features.push_back("-wavefrontsize64"); - } handleTargetFeaturesGroup( Args, Features, options::OPT_m_amdgpu_Features_Group); Index: clang/test/Driver/amdgpu-features.c =================================================================== --- clang/test/Driver/amdgpu-features.c +++ clang/test/Driver/amdgpu-features.c @@ -25,10 +25,16 @@ // NO-SRAM-ECC: "-target-feature" "-sram-ecc" // RUN: %clang -### -target amdgcn-amdpal -mcpu=gfx1010 -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefix=WAVE64 %s -// WAVE64: "-target-feature" "-wavefrontsize16" "-target-feature" "-wavefrontsize32" "-target-feature" "+wavefrontsize64" +// RUN: %clang -### -target amdgcn-amdpal -mcpu=gfx1010 -mno-wavefrontsize64 -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefix=WAVE64 %s +// WAVE64: "-target-feature" "+wavefrontsize64" +// WAVE64-NOT: {{".*wavefrontsize16"}} +// WAVE64-NOT: {{".*wavefrontsize32"}} // RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mno-wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=NO-WAVE64 %s -// NO-WAVE64: "-target-feature" "-wavefrontsize16" "-target-feature" "+wavefrontsize32" "-target-feature" "-wavefrontsize64" +// RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mwavefrontsize64 -mno-wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=NO-WAVE64 %s +// NO-WAVE64-NOT: {{".*wavefrontsize16"}} +// NO-WAVE64-NOT: {{".*wavefrontsize32"}} +// NO-WAVE64-NOT: {{".*wavefrontsize64"}} // RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mcumode %s 2>&1 | FileCheck --check-prefix=CUMODE %s // CUMODE: "-target-feature" "+cumode" Index: clang/test/Driver/amdgpu-macros.cl =================================================================== --- clang/test/Driver/amdgpu-macros.cl +++ clang/test/Driver/amdgpu-macros.cl @@ -346,4 +346,42 @@ // GFX1011-DAG: #define __amdgcn_processor__ "gfx1011" // GFX1012-DAG: #define __amdgcn_processor__ "gfx1012" // GFX1030-DAG: #define __amdgcn_processor__ "gfx1030" -// GFX1031-DAG: #define __amdgcn_processor__ "gfx1031" \ No newline at end of file +// GFX1031-DAG: #define __amdgcn_processor__ "gfx1031" + +// GFX600-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX601-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX700-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX701-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX702-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX703-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX704-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX801-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX802-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX803-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX810-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX900-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX902-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX904-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX906-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX908-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX909-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// GFX1010-DAG: #define __AMDGCN_WAVEFRONT_SIZE 32 +// GFX1011-DAG: #define __AMDGCN_WAVEFRONT_SIZE 32 +// GFX1012-DAG: #define __AMDGCN_WAVEFRONT_SIZE 32 +// GFX1030-DAG: #define __AMDGCN_WAVEFRONT_SIZE 32 +// GFX1031-DAG: #define __AMDGCN_WAVEFRONT_SIZE 32 + +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx906 -mwavefrontsize64 \ +// RUN: %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 -mwavefrontsize64 \ +// RUN: %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx906 -mwavefrontsize64 \ +// RUN: -mno-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 -mwavefrontsize64 \ +// RUN: -mno-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE32 %s +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx906 -mno-wavefrontsize64 \ +// RUN: -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 -mno-wavefrontsize64 \ +// RUN: -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s +// WAVE64-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// WAVE32-DAG: #define __AMDGCN_WAVEFRONT_SIZE 32 Index: clang/test/Driver/hip-macros.hip =================================================================== --- /dev/null +++ clang/test/Driver/hip-macros.hip @@ -0,0 +1,20 @@ +// RUN: %clang -E -dM --offload-arch=gfx906 -mwavefrontsize64 \ +// RUN: --cuda-device-only -nogpuinc -nogpulib \ +// RUN: %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s +// RUN: %clang -E -dM --offload-arch=gfx1010 -mwavefrontsize64 \ +// RUN: --cuda-device-only -nogpuinc -nogpulib \ +// RUN: %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s +// RUN: %clang -E -dM --offload-arch=gfx906 -mwavefrontsize64 \ +// RUN: --cuda-device-only -nogpuinc -nogpulib \ +// RUN: -mno-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s +// RUN: %clang -E -dM --offload-arch=gfx1010 -mwavefrontsize64 \ +// RUN: --cuda-device-only -nogpuinc -nogpulib \ +// RUN: -mno-wavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE32 %s +// RUN: %clang -E -dM --offload-arch=gfx906 -mno-wavefrontsize64 \ +// RUN: --cuda-device-only -nogpuinc -nogpulib \ +// RUN: -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s +// RUN: %clang -E -dM --offload-arch=gfx1010 -mno-wavefrontsize64 \ +// RUN: --cuda-device-only -nogpuinc -nogpulib \ +// RUN: -mwavefrontsize64 %s 2>&1 | FileCheck --check-prefixes=WAVE64 %s +// WAVE64-DAG: #define __AMDGCN_WAVEFRONT_SIZE 64 +// WAVE32-DAG: #define __AMDGCN_WAVEFRONT_SIZE 32 Index: clang/test/Driver/hip-toolchain-features.hip =================================================================== --- clang/test/Driver/hip-toolchain-features.hip +++ clang/test/Driver/hip-toolchain-features.hip @@ -37,8 +37,17 @@ // RUN: -mcumode -mcumode -mno-cumode -mwavefrontsize64 -mcumode \ // RUN: -mwavefrontsize64 -mno-wavefrontsize64 2>&1 \ // RUN: | FileCheck %s -check-prefix=DUP -// DUP: {{.*}}clang{{.*}} "-target-feature" "-wavefrontsize16" -// DUP-SAME: "-target-feature" "+wavefrontsize32" -// DUP-SAME: "-target-feature" "-wavefrontsize64" -// DUP-SAME: "-target-feature" "+cumode" -// DUP: {{.*}}lld{{.*}} "-plugin-opt=-mattr=-wavefrontsize16,+wavefrontsize32,-wavefrontsize64,+cumode" +// DUP: {{.*}}clang{{.*}} "-target-feature" "+cumode" +// DUP-NOT: "-target-feature" "{{.*}}wavefrontsize16" +// DUP-NOT: "-target-feature" "{{.*}}wavefrontsize32" +// DUP-NOT: "-target-feature" "{{.*}}wavefrontsize64" +// DUP: {{.*}}lld{{.*}} "-plugin-opt=-mattr=+cumode" + +// RUN: %clang -### -target x86_64-linux-gnu -fgpu-rdc -nogpulib \ +// RUN: --cuda-gpu-arch=gfx1010 %s \ +// RUN: -mno-wavefrontsize64 -mwavefrontsize64 2>&1 \ +// RUN: | FileCheck %s -check-prefix=WAVE64 +// WAVE64: {{.*}}clang{{.*}} "-target-feature" "+wavefrontsize64" +// WAVE64-NOT: "-target-feature" "{{.*}}wavefrontsize16" +// WAVE64-NOT: "-target-feature" "{{.*}}wavefrontsize32" +// WAVE64: {{.*}}lld{{.*}} "-plugin-opt=-mattr=+wavefrontsize64"