diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -179,197 +179,19 @@ bool AMDGPUTargetInfo::initFeatureMap( llvm::StringMap &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector &FeatureVec) const { - const bool IsNullCPU = CPU.empty(); - bool IsWave32Capable = false; using namespace llvm::AMDGPU; - - // XXX - What does the member GPU mean if device name string passed here? - if (isAMDGCN(getTriple())) { - switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { - case GK_GFX1103: - case GK_GFX1102: - case GK_GFX1101: - case GK_GFX1100: - IsWave32Capable = true; - Features["ci-insts"] = true; - Features["dot5-insts"] = true; - Features["dot7-insts"] = true; - Features["dot8-insts"] = true; - Features["dot9-insts"] = true; - Features["dot10-insts"] = true; - Features["dl-insts"] = true; - Features["16-bit-insts"] = true; - Features["dpp"] = true; - Features["gfx8-insts"] = true; - Features["gfx9-insts"] = true; - Features["gfx10-insts"] = true; - Features["gfx10-3-insts"] = true; - Features["gfx11-insts"] = true; - Features["atomic-fadd-rtn-insts"] = true; - break; - case GK_GFX1036: - case GK_GFX1035: - case GK_GFX1034: - case GK_GFX1033: - case GK_GFX1032: - case GK_GFX1031: - case GK_GFX1030: - IsWave32Capable = true; - Features["ci-insts"] = true; - Features["dot1-insts"] = true; - Features["dot2-insts"] = true; - Features["dot5-insts"] = true; - Features["dot6-insts"] = true; - Features["dot7-insts"] = true; - Features["dot10-insts"] = true; - Features["dl-insts"] = true; - Features["16-bit-insts"] = true; - Features["dpp"] = true; - Features["gfx8-insts"] = true; - Features["gfx9-insts"] = true; - Features["gfx10-insts"] = true; - Features["gfx10-3-insts"] = true; - Features["s-memrealtime"] = true; - Features["s-memtime-inst"] = true; - break; - case GK_GFX1012: - case GK_GFX1011: - Features["dot1-insts"] = true; - Features["dot2-insts"] = true; - Features["dot5-insts"] = true; - Features["dot6-insts"] = true; - Features["dot7-insts"] = true; - Features["dot10-insts"] = true; - [[fallthrough]]; - case GK_GFX1013: - case GK_GFX1010: - IsWave32Capable = true; - Features["dl-insts"] = true; - Features["ci-insts"] = true; - Features["16-bit-insts"] = true; - Features["dpp"] = true; - Features["gfx8-insts"] = true; - Features["gfx9-insts"] = true; - Features["gfx10-insts"] = true; - Features["s-memrealtime"] = true; - Features["s-memtime-inst"] = true; - break; - case GK_GFX940: - Features["gfx940-insts"] = true; - Features["fp8-insts"] = true; - Features["atomic-ds-pk-add-16-insts"] = true; - Features["atomic-flat-pk-add-16-insts"] = true; - Features["atomic-global-pk-add-bf16-inst"] = true; - [[fallthrough]]; - case GK_GFX90A: - Features["gfx90a-insts"] = true; - Features["atomic-buffer-global-pk-add-f16-insts"] = true; - Features["atomic-fadd-rtn-insts"] = true; - [[fallthrough]]; - case GK_GFX908: - Features["dot3-insts"] = true; - Features["dot4-insts"] = true; - Features["dot5-insts"] = true; - Features["dot6-insts"] = true; - Features["mai-insts"] = true; - [[fallthrough]]; - case GK_GFX906: - Features["dl-insts"] = true; - Features["dot1-insts"] = true; - Features["dot2-insts"] = true; - Features["dot7-insts"] = true; - Features["dot10-insts"] = true; - [[fallthrough]]; - case GK_GFX90C: - case GK_GFX909: - case GK_GFX904: - case GK_GFX902: - case GK_GFX900: - Features["gfx9-insts"] = true; - [[fallthrough]]; - case GK_GFX810: - case GK_GFX805: - case GK_GFX803: - case GK_GFX802: - case GK_GFX801: - Features["gfx8-insts"] = true; - Features["16-bit-insts"] = true; - Features["dpp"] = true; - Features["s-memrealtime"] = true; - [[fallthrough]]; - case GK_GFX705: - case GK_GFX704: - case GK_GFX703: - case GK_GFX702: - case GK_GFX701: - case GK_GFX700: - Features["ci-insts"] = true; - [[fallthrough]]; - case GK_GFX602: - case GK_GFX601: - case GK_GFX600: - Features["s-memtime-inst"] = true; - break; - case GK_NONE: - break; - default: - llvm_unreachable("Unhandled GPU!"); - } - } else { - if (CPU.empty()) - CPU = "r600"; - - switch (llvm::AMDGPU::parseArchR600(CPU)) { - case GK_CAYMAN: - case GK_CYPRESS: - case GK_RV770: - case GK_RV670: - // TODO: Add fp64 when implemented. - break; - case GK_TURKS: - case GK_CAICOS: - case GK_BARTS: - case GK_SUMO: - case GK_REDWOOD: - case GK_JUNIPER: - case GK_CEDAR: - case GK_RV730: - case GK_RV710: - case GK_RS880: - case GK_R630: - case GK_R600: - break; - default: - llvm_unreachable("Unhandled GPU!"); - } - } - + fillAMDGPUFeatureMap(CPU, getTriple(), Features); if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec)) return false; - // FIXME: Not diagnosing wavefrontsize32 on wave64 only targets. - const bool HaveWave32 = - (IsWave32Capable || IsNullCPU) && Features.count("wavefrontsize32"); - const bool HaveWave64 = Features.count("wavefrontsize64"); - // TODO: Should move this logic into TargetParser - if (HaveWave32 && HaveWave64) { - Diags.Report(diag::err_invalid_feature_combination) - << "'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive"; + std::string ErrorMsg; + if (!insertWaveSizeFeature(CPU, getTriple(), Features, ErrorMsg)) { + Diags.Report(diag::err_invalid_feature_combination) << ErrorMsg; return false; } - // Don't assume any wavesize with an unknown subtarget. - if (!IsNullCPU) { - // Default to wave32 if available, or wave64 if not - if (!HaveWave32 && !HaveWave64) { - StringRef DefaultWaveSizeFeature = - IsWave32Capable ? "wavefrontsize32" : "wavefrontsize64"; - Features.insert(std::make_pair(DefaultWaveSizeFeature, true)); - } - } - return true; } diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -331,6 +331,9 @@ .Case("aruba", "cayman") .Default(GPUName.str()); } + if (Arg *A = Args.getLastArg(options::OPT_march_EQ)) { + return getProcessorFromTargetID(T, A->getValue()).str(); + } return ""; } diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -107,6 +107,8 @@ switch (TC.getArch()) { default: break; + case llvm::Triple::r600: + case llvm::Triple::amdgcn: case llvm::Triple::aarch64: case llvm::Triple::riscv64: case llvm::Triple::x86_64: diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -60,6 +60,7 @@ #include "llvm/Support/SourceMgr.h" #include "llvm/Support/ToolOutputFile.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/TargetParser/TargetParser.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include #include @@ -131,6 +132,60 @@ (generateRtTypeTables() || true); } +// Get feature string which represents combined explicit target features +// for AMD GPU and the target features specified by the user +static std::string +getExplicitAndImplicitAMDGPUTargetFeatures(CompilerInstance &ci, + const TargetOptions &targetOpts, + const llvm::Triple triple) { + llvm::StringRef cpu = targetOpts.cpu; + llvm::StringMap implicitFeaturesMap; + std::string errorMsg; + // Get the set of implicit target features + llvm::AMDGPU::fillAMDGPUFeatureMap(cpu, triple, implicitFeaturesMap); + + // Add target features specified by the user + for (auto &userFeature : targetOpts.featuresAsWritten) { + std::string userKeyString = userFeature.substr(1); + implicitFeaturesMap[userKeyString] = (userFeature[0] == '+'); + } + + if (!llvm::AMDGPU::insertWaveSizeFeature(cpu, triple, implicitFeaturesMap, + errorMsg)) { + unsigned diagID = ci.getDiagnostics().getCustomDiagID( + clang::DiagnosticsEngine::Error, "Unsupported feature ID: %0"); + ci.getDiagnostics().Report(diagID) << errorMsg.data(); + return std::string(); + } + + llvm::SmallVector featuresVec; + for (auto &implicitFeatureItem : implicitFeaturesMap) { + featuresVec.push_back((llvm::Twine(implicitFeatureItem.second ? "+" : "-") + + implicitFeatureItem.first().str()) + .str()); + } + + return llvm::join(featuresVec, ","); +} + +// Produces the string which represents target feature +static std::string getTargetFeatures(CompilerInstance &ci) { + const TargetOptions &targetOpts = ci.getInvocation().getTargetOpts(); + const llvm::Triple triple(targetOpts.triple); + + // Clang does not append all target features to the clang -cc1 invocation. + // Some target features are parsed implicitly by clang::TargetInfo child + // class. Clang::TargetInfo classes are the basic clang classes and + // they cannot be reused by Flang. + // That's why we need to extract implicit target features and add + // them to the target features specified by the user + if (triple.isAMDGPU()) { + return getExplicitAndImplicitAMDGPUTargetFeatures(ci, targetOpts, triple); + } + return llvm::join(targetOpts.featuresAsWritten.begin(), + targetOpts.featuresAsWritten.end(), ","); +} + static void setMLIRDataLayout(mlir::ModuleOp &mlirModule, const llvm::DataLayout &dl) { mlir::MLIRContext *context = mlirModule.getContext(); @@ -671,8 +726,7 @@ llvm::CodeGenOpt::getLevel(CGOpts.OptimizationLevel); assert(OptLevelOrNone && "Invalid optimization level!"); llvm::CodeGenOpt::Level OptLevel = *OptLevelOrNone; - std::string featuresStr = llvm::join(targetOpts.featuresAsWritten.begin(), - targetOpts.featuresAsWritten.end(), ","); + std::string featuresStr = getTargetFeatures(ci); tm.reset(theTarget->createTargetMachine( theTriple, /*CPU=*/targetOpts.cpu, /*Features=*/featuresStr, llvm::TargetOptions(), diff --git a/flang/test/Driver/target-cpu-features-invalid.f90 b/flang/test/Driver/target-cpu-features-invalid.f90 --- a/flang/test/Driver/target-cpu-features-invalid.f90 +++ b/flang/test/Driver/target-cpu-features-invalid.f90 @@ -1,4 +1,4 @@ -! REQUIRES: aarch64-registered-target +! REQUIRES: aarch64-registered-target, amdgpu-registered-target ! Test that invalid cpu and features are ignored. @@ -8,6 +8,9 @@ ! RUN: %flang_fc1 -triple aarch64-linux-gnu -target-feature +superspeed \ ! RUN: -o /dev/null -S %s 2>&1 | FileCheck %s -check-prefix=CHECK-INVALID-FEATURE +! RUN: not %flang_fc1 -triple amdgcn-amd-amdhsa -target-feature +wavefrontsize32 \ +! RUN: -target-feature +wavefrontsize64 -o /dev/null -S %s 2>&1 | FileCheck %s -check-prefix=CHECK-INVALID-WAVEFRONT ! CHECK-INVALID-CPU: 'supercpu' is not a recognized processor for this target (ignoring processor) ! CHECK-INVALID-FEATURE: '+superspeed' is not a recognized feature for this target (ignoring feature) +! CHECK-INVALID-WAVEFRONT: 'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive diff --git a/flang/test/Driver/target-cpu-features.f90 b/flang/test/Driver/target-cpu-features.f90 --- a/flang/test/Driver/target-cpu-features.f90 +++ b/flang/test/Driver/target-cpu-features.f90 @@ -23,6 +23,11 @@ ! RUN: %flang --target=riscv64-linux-gnu -c %s -### 2>&1 \ ! RUN: | FileCheck %s -check-prefix=CHECK-RV64 +! RUN: %flang --target=amdgcn-amd-amdhsa -mcpu=gfx908 -c %s -### 2>&1 \ +! RUN: | FileCheck %s -check-prefix=CHECK-AMDGPU + +! RUN: %flang --target=r600-unknown-unknown -mcpu=cayman -c %s -### 2>&1 \ +! RUN: | FileCheck %s -check-prefix=CHECK-AMDGPU-R600 ! CHECK-A57: "-fc1" "-triple" "aarch64-unknown-linux-gnu" ! CHECK-A57-SAME: "-target-cpu" "cortex-a57" "-target-feature" "+v8a" "-target-feature" "+aes" "-target-feature" "+crc" "-target-feature" "+fp-armv8" "-target-feature" "+sha2" "-target-feature" "+neon" @@ -46,3 +51,9 @@ ! CHECK-RV64: "-fc1" "-triple" "riscv64-unknown-linux-gnu" ! CHECK-RV64-SAME: "-target-cpu" "generic-rv64" "-target-feature" "+m" "-target-feature" "+a" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+c" + +! CHECK-AMDGPU: "-fc1" "-triple" "amdgcn-amd-amdhsa" +! CHECK-AMDGPU-SAME: "-target-cpu" "gfx908" + +! CHECK-AMDGPU-R600: "-fc1" "-triple" "r600-unknown-unknown" +! CHECK-AMDGPU-R600-SAME: "-target-cpu" "cayman" diff --git a/llvm/include/llvm/TargetParser/TargetParser.h b/llvm/include/llvm/TargetParser/TargetParser.h --- a/llvm/include/llvm/TargetParser/TargetParser.h +++ b/llvm/include/llvm/TargetParser/TargetParser.h @@ -14,6 +14,7 @@ #ifndef LLVM_TARGETPARSER_TARGETPARSER_H #define LLVM_TARGETPARSER_TARGETPARSER_H +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" namespace llvm { @@ -149,6 +150,14 @@ IsaVersion getIsaVersion(StringRef GPU); +/// Fills Features map with default values for given target GPU +void fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, + StringMap &Features); + +/// Inserts wave size feature for given GPU into features map +bool insertWaveSizeFeature(StringRef GPU, const Triple &T, + StringMap &Features, std::string &ErrorMsg); + } // namespace AMDGPU } // namespace llvm diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -251,3 +251,218 @@ return T.isAMDGCN() ? getArchNameAMDGCN(ProcKind) : getArchNameR600(ProcKind); } + +void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, + StringMap &Features) { + // XXX - What does the member GPU mean if device name string passed here? + if (T.isAMDGCN()) { + switch (parseArchAMDGCN(GPU)) { + case GK_GFX1103: + case GK_GFX1102: + case GK_GFX1101: + case GK_GFX1100: + Features["ci-insts"] = true; + Features["dot5-insts"] = true; + Features["dot7-insts"] = true; + Features["dot8-insts"] = true; + Features["dot9-insts"] = true; + Features["dot10-insts"] = true; + Features["dl-insts"] = true; + Features["16-bit-insts"] = true; + Features["dpp"] = true; + Features["gfx8-insts"] = true; + Features["gfx9-insts"] = true; + Features["gfx10-insts"] = true; + Features["gfx10-3-insts"] = true; + Features["gfx11-insts"] = true; + Features["atomic-fadd-rtn-insts"] = true; + break; + case GK_GFX1036: + case GK_GFX1035: + case GK_GFX1034: + case GK_GFX1033: + case GK_GFX1032: + case GK_GFX1031: + case GK_GFX1030: + Features["ci-insts"] = true; + Features["dot1-insts"] = true; + Features["dot2-insts"] = true; + Features["dot5-insts"] = true; + Features["dot6-insts"] = true; + Features["dot7-insts"] = true; + Features["dot10-insts"] = true; + Features["dl-insts"] = true; + Features["16-bit-insts"] = true; + Features["dpp"] = true; + Features["gfx8-insts"] = true; + Features["gfx9-insts"] = true; + Features["gfx10-insts"] = true; + Features["gfx10-3-insts"] = true; + Features["s-memrealtime"] = true; + Features["s-memtime-inst"] = true; + break; + case GK_GFX1012: + case GK_GFX1011: + Features["dot1-insts"] = true; + Features["dot2-insts"] = true; + Features["dot5-insts"] = true; + Features["dot6-insts"] = true; + Features["dot7-insts"] = true; + Features["dot10-insts"] = true; + [[fallthrough]]; + case GK_GFX1013: + case GK_GFX1010: + Features["dl-insts"] = true; + Features["ci-insts"] = true; + Features["16-bit-insts"] = true; + Features["dpp"] = true; + Features["gfx8-insts"] = true; + Features["gfx9-insts"] = true; + Features["gfx10-insts"] = true; + Features["s-memrealtime"] = true; + Features["s-memtime-inst"] = true; + break; + case GK_GFX940: + Features["gfx940-insts"] = true; + Features["fp8-insts"] = true; + Features["atomic-ds-pk-add-16-insts"] = true; + Features["atomic-flat-pk-add-16-insts"] = true; + Features["atomic-global-pk-add-bf16-inst"] = true; + [[fallthrough]]; + case GK_GFX90A: + Features["gfx90a-insts"] = true; + Features["atomic-buffer-global-pk-add-f16-insts"] = true; + Features["atomic-fadd-rtn-insts"] = true; + [[fallthrough]]; + case GK_GFX908: + Features["dot3-insts"] = true; + Features["dot4-insts"] = true; + Features["dot5-insts"] = true; + Features["dot6-insts"] = true; + Features["mai-insts"] = true; + [[fallthrough]]; + case GK_GFX906: + Features["dl-insts"] = true; + Features["dot1-insts"] = true; + Features["dot2-insts"] = true; + Features["dot7-insts"] = true; + Features["dot10-insts"] = true; + [[fallthrough]]; + case GK_GFX90C: + case GK_GFX909: + case GK_GFX904: + case GK_GFX902: + case GK_GFX900: + Features["gfx9-insts"] = true; + [[fallthrough]]; + case GK_GFX810: + case GK_GFX805: + case GK_GFX803: + case GK_GFX802: + case GK_GFX801: + Features["gfx8-insts"] = true; + Features["16-bit-insts"] = true; + Features["dpp"] = true; + Features["s-memrealtime"] = true; + [[fallthrough]]; + case GK_GFX705: + case GK_GFX704: + case GK_GFX703: + case GK_GFX702: + case GK_GFX701: + case GK_GFX700: + Features["ci-insts"] = true; + [[fallthrough]]; + case GK_GFX602: + case GK_GFX601: + case GK_GFX600: + Features["s-memtime-inst"] = true; + break; + case GK_NONE: + break; + default: + llvm_unreachable("Unhandled GPU!"); + } + } else { + if (GPU.empty()) + GPU = "r600"; + + switch (llvm::AMDGPU::parseArchR600(GPU)) { + case GK_CAYMAN: + case GK_CYPRESS: + case GK_RV770: + case GK_RV670: + // TODO: Add fp64 when implemented. + break; + case GK_TURKS: + case GK_CAICOS: + case GK_BARTS: + case GK_SUMO: + case GK_REDWOOD: + case GK_JUNIPER: + case GK_CEDAR: + case GK_RV730: + case GK_RV710: + case GK_RS880: + case GK_R630: + case GK_R600: + break; + default: + llvm_unreachable("Unhandled GPU!"); + } + } +} + +static bool isWave32Capable(StringRef GPU, const Triple &T) { + bool IsWave32Capable = false; + // XXX - What does the member GPU mean if device name string passed here? + if (T.isAMDGCN()) { + switch (parseArchAMDGCN(GPU)) { + case GK_GFX1103: + case GK_GFX1102: + case GK_GFX1101: + case GK_GFX1100: + case GK_GFX1036: + case GK_GFX1035: + case GK_GFX1034: + case GK_GFX1033: + case GK_GFX1032: + case GK_GFX1031: + case GK_GFX1030: + case GK_GFX1012: + case GK_GFX1011: + case GK_GFX1013: + case GK_GFX1010: + IsWave32Capable = true; + break; + default: + break; + } + } + return IsWave32Capable; +} + +bool AMDGPU::insertWaveSizeFeature(StringRef GPU, const Triple &T, + StringMap &Features, + std::string &ErrorMsg) { + bool IsWave32Capable = isWave32Capable(GPU, T); + const bool IsNullGPU = GPU.empty(); + // FIXME: Not diagnosing wavefrontsize32 on wave64 only targets. + const bool HaveWave32 = + (IsWave32Capable || IsNullGPU) && Features.count("wavefrontsize32"); + const bool HaveWave64 = Features.count("wavefrontsize64"); + if (HaveWave32 && HaveWave64) { + ErrorMsg = "'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive"; + return false; + } + // Don't assume any wavesize with an unknown subtarget. + if (!IsNullGPU) { + // Default to wave32 if available, or wave64 if not + if (!HaveWave32 && !HaveWave64) { + StringRef DefaultWaveSizeFeature = + IsWave32Capable ? "wavefrontsize32" : "wavefrontsize64"; + Features.insert(std::make_pair(DefaultWaveSizeFeature, true)); + } + } + return true; +}