Index: clang/include/clang/Basic/BuiltinsAMDGPU.def =================================================================== --- clang/include/clang/Basic/BuiltinsAMDGPU.def +++ clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -120,12 +120,6 @@ BUILTIN(__builtin_amdgcn_s_incperflevel, "vIi", "n") BUILTIN(__builtin_amdgcn_s_decperflevel, "vIi", "n") BUILTIN(__builtin_amdgcn_s_setprio, "vIs", "n") -BUILTIN(__builtin_amdgcn_uicmp, "WUiUiUiIi", "nc") -BUILTIN(__builtin_amdgcn_uicmpl, "WUiWUiWUiIi", "nc") -BUILTIN(__builtin_amdgcn_sicmp, "WUiiiIi", "nc") -BUILTIN(__builtin_amdgcn_sicmpl, "WUiWiWiIi", "nc") -BUILTIN(__builtin_amdgcn_fcmp, "WUiddIi", "nc") -BUILTIN(__builtin_amdgcn_fcmpf, "WUiffIi", "nc") BUILTIN(__builtin_amdgcn_ds_swizzle, "iiIi", "nc") BUILTIN(__builtin_amdgcn_ds_permute, "iii", "nc") BUILTIN(__builtin_amdgcn_ds_bpermute, "iii", "nc") @@ -155,6 +149,21 @@ BUILTIN(__builtin_amdgcn_mqsad_pk_u16_u8, "WUiWUiUiWUi", "nc") BUILTIN(__builtin_amdgcn_mqsad_u32_u8, "V4UiWUiUiV4Ui", "nc") +//===----------------------------------------------------------------------===// +// Ballot builtins. +//===----------------------------------------------------------------------===// + +TARGET_BUILTIN(__builtin_amdgcn_ballot_w32, "Uib", "nc", "wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_ballot_w64, "LUib", "nc", "wavefrontsize64") + +// Deprecated intrinsics in favor of __builtin_amdgn_ballot_{w32|w64} +BUILTIN(__builtin_amdgcn_uicmp, "WUiUiUiIi", "nc") +BUILTIN(__builtin_amdgcn_uicmpl, "WUiWUiWUiIi", "nc") +BUILTIN(__builtin_amdgcn_sicmp, "WUiiiIi", "nc") +BUILTIN(__builtin_amdgcn_sicmpl, "WUiWiWiIi", "nc") +BUILTIN(__builtin_amdgcn_fcmp, "WUiddIi", "nc") +BUILTIN(__builtin_amdgcn_fcmpf, "WUiffIi", "nc") + //===----------------------------------------------------------------------===// // CI+ only builtins. //===----------------------------------------------------------------------===// Index: clang/lib/Basic/Targets/AMDGPU.cpp =================================================================== --- clang/lib/Basic/Targets/AMDGPU.cpp +++ clang/lib/Basic/Targets/AMDGPU.cpp @@ -13,6 +13,7 @@ #include "AMDGPU.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/CodeGenOptions.h" +#include "clang/Basic/Diagnostic.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/MacroBuilder.h" #include "clang/Basic/TargetBuiltins.h" @@ -178,6 +179,8 @@ bool AMDGPUTargetInfo::initFeatureMap( llvm::StringMap &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector &FeatureVec) const { + const bool IsNullCPU = CPU.empty(); + bool IsWave32Capable = false; using namespace llvm::AMDGPU; @@ -188,6 +191,7 @@ case GK_GFX1102: case GK_GFX1101: case GK_GFX1100: + IsWave32Capable = true; Features["ci-insts"] = true; Features["dot1-insts"] = true; Features["dot5-insts"] = true; @@ -211,6 +215,7 @@ case GK_GFX1032: case GK_GFX1031: case GK_GFX1030: + IsWave32Capable = true; Features["ci-insts"] = true; Features["dot1-insts"] = true; Features["dot2-insts"] = true; @@ -238,6 +243,7 @@ [[fallthrough]]; case GK_GFX1013: case GK_GFX1010: + IsWave32Capable = true; Features["dl-insts"] = true; Features["ci-insts"] = true; Features["flat-address-space"] = true; @@ -334,7 +340,32 @@ } } - return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec); + if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec)) + return false; + + // FIXME: Not diagnosing wavefrontsize32 on wave64 only targets. + const bool HaveWave32 = + (IsWave32Capable || IsNullCPU) && Features.count("wavefrontsize32"); + const bool HaveWave64 = Features.count("wavefrontsize64"); + + // TODO: Should move this logic into TargetParser + if (HaveWave32 && HaveWave64) { + Diags.Report(diag::err_invalid_feature_combination) + << "'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive"; + return false; + } + + // Don't assume any wavesize with an unknown subtarget. + if (!IsNullCPU) { + // Default to wave32 if available, or wave64 if not + if (!HaveWave32 && !HaveWave64) { + StringRef DefaultWaveSizeFeature = + IsWave32Capable ? "wavefrontsize32" : "wavefrontsize64"; + Features.insert(std::make_pair(DefaultWaveSizeFeature, true)); + } + } + + return true; } void AMDGPUTargetInfo::fillValidCPUList( Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -16948,6 +16948,13 @@ return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_ubfe); case AMDGPU::BI__builtin_amdgcn_sbfe: return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_sbfe); + case AMDGPU::BI__builtin_amdgcn_ballot_w32: + case AMDGPU::BI__builtin_amdgcn_ballot_w64: { + llvm::Type *ResultType = ConvertType(E->getType()); + llvm::Value *Src = EmitScalarExpr(E->getArg(0)); + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType }); + return Builder.CreateCall(F, { Src }); + } case AMDGPU::BI__builtin_amdgcn_uicmp: case AMDGPU::BI__builtin_amdgcn_uicmpl: case AMDGPU::BI__builtin_amdgcn_sicmp: Index: clang/test/CodeGenOpenCL/amdgpu-features-illegal.cl =================================================================== --- /dev/null +++ clang/test/CodeGenOpenCL/amdgpu-features-illegal.cl @@ -0,0 +1,6 @@ +// RUN: not %clang_cc1 -triple amdgcn -target-feature +wavefrontsize32 -target-feature +wavefrontsize64 -o /dev/null %s 2>&1 | FileCheck %s +// RUN: not %clang_cc1 -triple amdgcn -target-cpu gfx1103 -target-feature +wavefrontsize32 -target-feature +wavefrontsize64 -o /dev/null %s 2>&1 | FileCheck %s + +// CHECK: error: invalid feature combination: 'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive + +kernel void test() {} Index: clang/test/CodeGenOpenCL/amdgpu-features.cl =================================================================== --- clang/test/CodeGenOpenCL/amdgpu-features.cl +++ clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -3,6 +3,10 @@ // Check that appropriate features are defined for every supported AMDGPU // "-target" and "-mcpu" options. +// RUN: %clang_cc1 -triple amdgcn -S -emit-llvm -o - %s | FileCheck --check-prefix=NOCPU %s +// RUN: %clang_cc1 -triple amdgcn -target-feature +wavefrontsize32 -S -emit-llvm -o - %s | FileCheck --check-prefix=NOCPU-WAVE32 %s +// RUN: %clang_cc1 -triple amdgcn -target-feature +wavefrontsize64 -S -emit-llvm -o - %s | FileCheck --check-prefix=NOCPU-WAVE64 %s + // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx600 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX600 %s // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx601 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX601 %s // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx602 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX602 %s @@ -42,43 +46,50 @@ // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1102 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1102 %s // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1103 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1103 %s -// GFX600: "target-features"="+s-memtime-inst" -// GFX601: "target-features"="+s-memtime-inst" -// GFX602: "target-features"="+s-memtime-inst" -// GFX700: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst" -// GFX701: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst" -// GFX702: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst" -// GFX703: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst" -// GFX704: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst" -// GFX705: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst" -// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+s-memtime-inst" -// GFX802: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+s-memtime-inst" -// GFX803: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+s-memtime-inst" -// GFX805: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+s-memtime-inst" -// GFX810: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+s-memtime-inst" -// GFX900: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" -// GFX902: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" -// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" -// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" -// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst" -// GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" -// GFX90A: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst" -// GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" -// GFX940: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst" -// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" -// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" -// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" -// GFX1013: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" -// GFX1030: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" -// GFX1031: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" -// GFX1032: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" -// GFX1033: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" -// GFX1034: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" -// GFX1035: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" -// GFX1036: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" -// GFX1100: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts" -// GFX1101: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts" -// GFX1102: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts" -// GFX1103: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts" +// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1103 -target-feature +wavefrontsize64 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1103-W64 %s + +// NOCPU-NOT: "target-features" +// NOCPU-WAVE32: "target-features"="+wavefrontsize32" +// NOCPU-WAVE64: "target-features"="+wavefrontsize64" + +// GFX600: "target-features"="+s-memtime-inst,+wavefrontsize64" +// GFX601: "target-features"="+s-memtime-inst,+wavefrontsize64" +// GFX602: "target-features"="+s-memtime-inst,+wavefrontsize64" +// GFX700: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst,+wavefrontsize64" +// GFX701: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst,+wavefrontsize64" +// GFX702: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst,+wavefrontsize64" +// GFX703: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst,+wavefrontsize64" +// GFX704: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst,+wavefrontsize64" +// GFX705: "target-features"="+ci-insts,+flat-address-space,+s-memtime-inst,+wavefrontsize64" +// GFX801: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" +// GFX802: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" +// GFX803: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" +// GFX805: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" +// GFX810: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" +// GFX900: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" +// GFX902: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" +// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" +// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" +// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" +// GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" +// GFX90A: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" +// GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" +// GFX940: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" +// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" +// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" +// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" +// GFX1013: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" +// GFX1030: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" +// GFX1031: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" +// GFX1032: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" +// GFX1033: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" +// GFX1034: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" +// GFX1035: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" +// GFX1036: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" +// GFX1100: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX1101: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX1102: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX1103: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" +// GFX1103-W64: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64" kernel void test() {} Index: clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl =================================================================== --- clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl +++ clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl @@ -37,3 +37,10 @@ { *out = __builtin_amdgcn_groupstaticsize(); } + +// CHECK-LABEL: @test_ballot_wave32( +// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 %{{.+}}) +void test_ballot_wave32(global uint* out, int a, int b) +{ + *out = __builtin_amdgcn_ballot_w32(a == b); +} Index: clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl =================================================================== --- /dev/null +++ clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -D__AMDGCN_WAVEFRONT_SIZE=32 -target-feature +wavefrontsize32 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -target-feature +wavefrontsize32 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -target-feature +wavefrontsize32 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s + +typedef unsigned int uint; + + +// CHECK-LABEL: @test_ballot_wave32( +// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 %{{.+}}) +void test_ballot_wave32(global uint* out, int a, int b) +{ + *out = __builtin_amdgcn_ballot_w32(a == b); +} + +// CHECK-LABEL: @test_ballot_wave32_target_attr( +// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 %{{.+}}) +__attribute__((target("wavefrontsize32"))) +void test_ballot_wave32_target_attr(global uint* out, int a, int b) +{ + *out = __builtin_amdgcn_ballot_w32(a == b); +} + +#if __AMDGCN_WAVEFRONT_SIZE != 32 +#error Wrong wavesize detected +#endif Index: clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl =================================================================== --- /dev/null +++ clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-feature +wavefrontsize64 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx900 -target-feature +wavefrontsize64 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -target-feature +wavefrontsize64 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -target-feature +wavefrontsize64 -S -emit-llvm -o - %s | FileCheck -enable-var-scope %s + +typedef unsigned long ulong; + +// CHECK-LABEL: @test_ballot_wave64( +// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 %{{.+}}) +void test_ballot_wave64(global ulong* out, int a, int b) +{ + *out = __builtin_amdgcn_ballot_w64(a == b); +} + +// CHECK-LABEL: @test_ballot_wave64_target_attr( +// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 %{{.+}}) +__attribute__((target("wavefrontsize64"))) +void test_ballot_wave64_target_attr(global ulong* out, int a, int b) +{ + *out = __builtin_amdgcn_ballot_w64(a == b); +} + +#if __AMDGCN_WAVEFRONT_SIZE != 64 +#error Wrong wavesize detected +#endif Index: clang/test/OpenMP/amdgcn-attributes.cpp =================================================================== --- clang/test/OpenMP/amdgcn-attributes.cpp +++ clang/test/OpenMP/amdgcn-attributes.cpp @@ -33,11 +33,11 @@ } // DEFAULT: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "kernel" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } -// CPU: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "kernel" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" "uniform-work-group-size"="true" } +// CPU: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "kernel" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" "uniform-work-group-size"="true" } // NOIEEE: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-ieee"="false" "frame-pointer"="none" "kernel" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } // UNSAFEATOMIC: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-unsafe-fp-atomics"="true" "frame-pointer"="none" "kernel" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" } // DEFAULT: attributes #1 = { convergent mustprogress noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// CPU: attributes #1 = { convergent mustprogress noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" } +// CPU: attributes #1 = { convergent mustprogress noinline nounwind optnone "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } // NOIEEE: attributes #1 = { convergent mustprogress noinline nounwind optnone "amdgpu-ieee"="false" "frame-pointer"="none" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } // UNSAFEATOMIC: attributes #1 = { convergent mustprogress noinline nounwind optnone "amdgpu-unsafe-fp-atomics"="true" "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } Index: clang/test/SemaOpenCL/builtins-amdgcn-error-wave32.cl =================================================================== --- /dev/null +++ clang/test/SemaOpenCL/builtins-amdgcn-error-wave32.cl @@ -0,0 +1,17 @@ +// RUN: %clang_cc1 -triple amdgcn-- -verify -S -o - %s +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx900 -verify -S -o - %s +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx900 -target-feature +wavefrontsize64 -verify -S -o - %s +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -target-feature +wavefrontsize64 -verify -S -o - %s +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -target-feature -wavefrontsize32 -verify -S -o - %s + +typedef unsigned int uint; + +void test_ballot_wave32(global uint* out, int a, int b) { + *out = __builtin_amdgcn_ballot_w32(a == b); // expected-error {{'__builtin_amdgcn_ballot_w32' needs target feature wavefrontsize32}} +} + +// FIXME: Should error for subtargets that don't support wave32 +__attribute__((target("wavefrontsize32"))) +void test_ballot_wave32_target_attr(global uint* out, int a, int b) { + *out = __builtin_amdgcn_ballot_w32(a == b); +} Index: clang/test/SemaOpenCL/builtins-amdgcn-error-wave64.cl =================================================================== --- /dev/null +++ clang/test/SemaOpenCL/builtins-amdgcn-error-wave64.cl @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 -triple amdgcn-- -verify -S -o - %s +// RUN: %clang_cc1 -triple amdgcn-- -target-feature +wavefrontsize32 -verify -S -o - %s +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -target-feature +wavefrontsize32 -verify -S -o - %s +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -target-feature -wavefrontsize64 -verify -S -o - %s +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -verify -S -o - %s + +typedef unsigned long ulong; + +void test_ballot_wave64(global ulong* out, int a, int b) { + *out = __builtin_amdgcn_ballot_w64(a == b); // expected-error {{'__builtin_amdgcn_ballot_w64' needs target feature wavefrontsize64}} +} + +__attribute__((target("wavefrontsize64"))) +void test_ballot_wave64_target_attr(global ulong* out, int a, int b) { + *out = __builtin_amdgcn_ballot_w64(a == b); +}