This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
clang/
-
lib/Driver/ToolChains/
-
Driver/
-
ToolChains/
-
Cuda.cpp
-
test/Driver/
-
Driver/
-
openmp-system-arch.c

Differential D141708

[OpenMP] Make `-fopenmp-target=` use the `nvptx-arch` tool
ClosedPublic

Authored by jhuber6 on Jan 13 2023, 10:38 AM.

Download Raw Diff

Details

Reviewers

tianshilei1992
jdoerfert
tra

Commits

rG52b9a3974244: [OpenMP] Make `-fopenmp-target=` use the `nvptx-arch` tool

Summary

Previously, if the user did not provide an architecture when using
-fopenmp-targets=nvptx64 we used the value from
CLANG_OPENMP_DEFAULT_NVPTX_ARCH which is defined at compile time. This
isn't ideal because it means that the default is set when the LLVM
compiler it built. Instead this patch uses the nvptx-arch tool to
query it at runtime. This matches the existing behaviour of the AMDGPU
toolchain with its amdgpu-arch tool.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

jhuber6 created this revision.Jan 13 2023, 10:38 AM

Herald added a project: Restricted Project. · View Herald TranscriptJan 13 2023, 10:38 AM

Herald added subscribers: kosarev, mattd, gchakrabarti and 4 others. · View Herald Transcript

jhuber6 requested review of this revision.Jan 13 2023, 10:38 AM

Herald added a project: Restricted Project. · View Herald TranscriptJan 13 2023, 10:38 AM

Herald added subscribers: cfe-commits, sstefan1, MaskRay. · View Herald Transcript

Harbormaster completed remote builds in B207682: Diff 489062.Jan 13 2023, 11:56 AM

jhuber6 added a child revision: D141723: [Clang] Remove `CLANG_OPENMP_NVPTX_DEFAULT_ARCH` CMake option..Jan 13 2023, 1:04 PM

This revision is now accepted and ready to land.Jan 13 2023, 2:43 PM

Closed by commit rG52b9a3974244: [OpenMP] Make `-fopenmp-target=` use the `nvptx-arch` tool (authored by jhuber6). · Explain WhyJan 13 2023, 2:52 PM

This revision was automatically updated to reflect the committed changes.

jhuber6 added a commit: rG52b9a3974244: [OpenMP] Make `-fopenmp-target=` use the `nvptx-arch` tool.

Revision Contents

Path

Size

clang/

lib/

Driver/

ToolChains/

Cuda.cpp

22 lines

test/

Driver/

openmp-system-arch.c

18 lines

Diff 489130

clang/lib/Driver/ToolChains/Cuda.cpp

Show All 13 Lines
#include "clang/Driver/Distro.h"		#include "clang/Driver/Distro.h"
#include "clang/Driver/Driver.h"		#include "clang/Driver/Driver.h"
#include "clang/Driver/DriverDiagnostic.h"		#include "clang/Driver/DriverDiagnostic.h"
#include "clang/Driver/InputInfo.h"		#include "clang/Driver/InputInfo.h"
#include "clang/Driver/Options.h"		#include "clang/Driver/Options.h"
#include "llvm/ADT/StringExtras.h"		#include "llvm/ADT/StringExtras.h"
#include "llvm/Option/ArgList.h"		#include "llvm/Option/ArgList.h"
#include "llvm/Support/FileSystem.h"		#include "llvm/Support/FileSystem.h"
		#include "llvm/Support/FormatAdapters.h"
		#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Host.h"		#include "llvm/Support/Host.h"
#include "llvm/Support/Path.h"		#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"		#include "llvm/Support/Process.h"
#include "llvm/Support/Program.h"		#include "llvm/Support/Program.h"
#include "llvm/Support/TargetParser.h"		#include "llvm/Support/TargetParser.h"
#include "llvm/Support/VirtualFileSystem.h"		#include "llvm/Support/VirtualFileSystem.h"
#include <system_error>		#include <system_error>

▲ Show 20 Lines • Show All 718 Lines • ▼ Show 20 Lines	CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
// For OpenMP device offloading, append derived arguments. Make sure		// For OpenMP device offloading, append derived arguments. Make sure
// flags are not duplicated.		// flags are not duplicated.
// Also append the compute capability.		// Also append the compute capability.
if (DeviceOffloadKind == Action::OFK_OpenMP) {		if (DeviceOffloadKind == Action::OFK_OpenMP) {
for (Arg *A : Args)		for (Arg *A : Args)
if (!llvm::is_contained(*DAL, A))		if (!llvm::is_contained(*DAL, A))
DAL->append(A);		DAL->append(A);

if (!DAL->hasArg(options::OPT_march_EQ))		if (!DAL->hasArg(options::OPT_march_EQ)) {
DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),		StringRef Arch = BoundArch;
!BoundArch.empty() ? BoundArch		if (Arch.empty()) {
: CLANG_OPENMP_NVPTX_DEFAULT_ARCH);		auto ArchsOrErr = getSystemGPUArchs(Args);
		if (!ArchsOrErr) {
		std::string ErrMsg =
		llvm::formatv("{0}", llvm::fmt_consume(ArchsOrErr.takeError()));
		getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
		<< llvm::Triple::getArchTypeName(getArch()) << ErrMsg << "-march";
		Arch = CudaArchToString(CudaArch::CudaDefault);
		} else {
		Arch = Args.MakeArgString(ArchsOrErr->front());
		}
		}
		DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), Arch);
		}

return DAL;		return DAL;
}		}

for (Arg *A : Args) {		for (Arg *A : Args) {
DAL->append(A);		DAL->append(A);
}		}

▲ Show 20 Lines • Show All 86 Lines • Show Last 20 Lines

clang/test/Driver/openmp-system-arch.c

	Show All 31 Lines
	// RUN: --nvptx-arch-tool=%t/nvptx_arch_empty --amdgpu-arch-tool=%t/amdgpu_arch_empty %s 2>&1 \			// RUN: --nvptx-arch-tool=%t/nvptx_arch_empty --amdgpu-arch-tool=%t/amdgpu_arch_empty %s 2>&1 \
	// RUN: \| FileCheck %s --check-prefix=NO-OUTPUT-ERROR			// RUN: \| FileCheck %s --check-prefix=NO-OUTPUT-ERROR
	// NO-OUTPUT-ERROR: error: failed to deduce triple for target architecture 'native'; specify the triple using '-fopenmp-targets' and '-Xopenmp-target' instead.			// NO-OUTPUT-ERROR: error: failed to deduce triple for target architecture 'native'; specify the triple using '-fopenmp-targets' and '-Xopenmp-target' instead.

	// case when amdgpu-arch succeeds.			// case when amdgpu-arch succeeds.
	// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp --offload-arch=native \			// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp --offload-arch=native \
	// RUN: --nvptx-arch-tool=%t/nvptx_arch_fail --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 \			// RUN: --nvptx-arch-tool=%t/nvptx_arch_fail --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 \
	// RUN: \| FileCheck %s --check-prefix=ARCH-GFX906			// RUN: \| FileCheck %s --check-prefix=ARCH-GFX906
				// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa \
				// RUN: --nvptx-arch-tool=%t/nvptx_arch_fail --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 \
				// RUN: \| FileCheck %s --check-prefix=ARCH-GFX906
	// ARCH-GFX906: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906"			// ARCH-GFX906: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906"

	// case when nvptx-arch succeeds.			// case when nvptx-arch succeeds.
	// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp --offload-arch=native \			// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp --offload-arch=native \
	// RUN: --nvptx-arch-tool=%t/nvptx_arch_sm_70 --amdgpu-arch-tool=%t/amdgpu_arch_fail %s 2>&1 \			// RUN: --nvptx-arch-tool=%t/nvptx_arch_sm_70 --amdgpu-arch-tool=%t/amdgpu_arch_fail %s 2>&1 \
	// RUN: \| FileCheck %s --check-prefix=ARCH-SM_70			// RUN: \| FileCheck %s --check-prefix=ARCH-SM_70
				// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
				// RUN: --nvptx-arch-tool=%t/nvptx_arch_sm_70 --amdgpu-arch-tool=%t/amdgpu_arch_fail %s 2>&1 \
				// RUN: \| FileCheck %s --check-prefix=ARCH-SM_70
	// ARCH-SM_70: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70"			// ARCH-SM_70: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70"

	// case when both nvptx-arch and amdgpu-arch succeed.			// case when both nvptx-arch and amdgpu-arch succeed.
	// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp --offload-arch=native \			// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp --offload-arch=native \
	// RUN: --nvptx-arch-tool=%t/nvptx_arch_sm_70 --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 \			// RUN: --nvptx-arch-tool=%t/nvptx_arch_sm_70 --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 \
	// RUN: \| FileCheck %s --check-prefix=ARCH-SM_70-GFX906			// RUN: \| FileCheck %s --check-prefix=ARCH-SM_70-GFX906
	// ARCH-SM_70-GFX906: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906"			// ARCH-SM_70-GFX906: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906"
	// ARCH-SM_70-GFX906: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70"			// ARCH-SM_70-GFX906: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70"

	// case when both nvptx-arch and amdgpu-arch succeed with other archs.			// case when both nvptx-arch and amdgpu-arch succeed with other archs.
	// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp --offload-arch=native,sm_75,gfx1030 \			// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp --offload-arch=native,sm_75,gfx1030 \
	// RUN: --nvptx-arch-tool=%t/nvptx_arch_sm_70 --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 \			// RUN: --nvptx-arch-tool=%t/nvptx_arch_sm_70 --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 \
	// RUN: \| FileCheck %s --check-prefix=ARCH-MULTIPLE			// RUN: \| FileCheck %s --check-prefix=ARCH-MULTIPLE
	// ARCH-MULTIPLE: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx1030"			// ARCH-MULTIPLE: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx1030"
	// ARCH-MULTIPLE: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906"			// ARCH-MULTIPLE: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906"
	// ARCH-MULTIPLE: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70"			// ARCH-MULTIPLE: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70"
	// ARCH-MULTIPLE: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_75"			// ARCH-MULTIPLE: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_75"

				// case when 'nvptx-arch' returns nothing using `-fopenmp-targets=`.
				// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp \
				// RUN: -fopenmp-targets=nvptx64-nvidia-cuda --nvptx-arch-tool=%t/nvptx_arch_empty %s 2>&1 \
				// RUN: \| FileCheck %s --check-prefix=NVPTX
				// NVPTX: error: cannot determine nvptx64 architecture: No NVIDIA GPU detected in the system; consider passing it via '-march'

				// case when 'amdgpu-arch' returns nothing using `-fopenmp-targets=`.
				// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp \
				// RUN: -fopenmp-targets=amdgcn-amd-amdhsa --amdgpu-arch-tool=%t/amdgpu_arch_empty %s 2>&1 \
				// RUN: \| FileCheck %s --check-prefix=AMDGPU
				// AMDGPU: error: cannot determine amdgcn architecture: No AMD GPU detected in the system; consider passing it via '-march'