This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
clang/
-
lib/CodeGen/
-
CodeGen/
-
CGOpenMPRuntimeGPU.cpp
-
test/OpenMP/
-
OpenMP/
-
requires_codegen.cpp

Differential D101595

[Clang][OpenMP] Allow unified_shared_memory for Pascal-generation GPUs.
ClosedPublic

Authored by Meinersbur on Apr 29 2021, 9:32 PM.

Download Raw Diff

Details

Reviewers

ye-luo
JonChesterfield
ABataev
patricklyster
kkwli0
jdoerfert

Commits

rG83ff0ff46337: [Clang][OpenMP] Allow unified_shared_memory for Pascal-generation GPUs.

Summary

The Pascal architecture supports the page migration engine required for unified_shared_memory, as indicated by NVIDIA:

The limitation was introduced in D54493 which justified the cut-off by the requirement for unified addressing. However, Unified Virtual Addressing (UVA) is already available with sm20 (Fermi. Kepler, Maxwell). Unified shared memory might even be possible with these, but with migration of entire allocations on kernel startup.

To be sure, I enabled the tests for a Pascal GPU which finish successfully.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

Meinersbur created this revision.Apr 29 2021, 9:32 PM

Herald added subscribers: guansong, yaxunl. · View Herald TranscriptApr 29 2021, 9:32 PM

Meinersbur requested review of this revision.Apr 29 2021, 9:32 PM

Herald added a reviewer: jdoerfert. · View Herald TranscriptApr 29 2021, 9:32 PM

Herald added a subscriber: sstefan1. · View Herald Transcript

Harbormaster completed remote builds in B101826: Diff 341762.Apr 29 2021, 10:36 PM

Nice. Thanks for including the references.

This revision is now accepted and ready to land.Apr 30 2021, 12:43 AM

Meinersbur mentioned this in D101498: [OpenMP] Test unified shared memory tests only on systems that support it..Apr 30 2021, 11:55 AM

Will fix the failed required_codegen.cpp test.

Fix test case

Will wait for D101498 is resolved, otherwise I cannot test this anymore.

This revision is now accepted and ready to land.May 1 2021, 7:52 PM

Harbormaster completed remote builds in B102133: Diff 342190.May 1 2021, 8:44 PM

Closed by commit rG83ff0ff46337: [Clang][OpenMP] Allow unified_shared_memory for Pascal-generation GPUs. (authored by Meinersbur). · Explain WhyMay 13 2021, 3:16 PM

This revision was automatically updated to reflect the committed changes.

Meinersbur added a commit: rG83ff0ff46337: [Clang][OpenMP] Allow unified_shared_memory for Pascal-generation GPUs..

Revision Contents

Path

Size

clang/

lib/

CodeGen/

CGOpenMPRuntimeGPU.cpp

8 lines

test/

OpenMP/

requires_codegen.cpp

6 lines

Diff 342190

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Show First 20 Lines • Show All 4,435 Lines • ▼ Show 20 Lines	if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
case CudaArch::SM_20:		case CudaArch::SM_20:
case CudaArch::SM_21:		case CudaArch::SM_21:
case CudaArch::SM_30:		case CudaArch::SM_30:
case CudaArch::SM_32:		case CudaArch::SM_32:
case CudaArch::SM_35:		case CudaArch::SM_35:
case CudaArch::SM_37:		case CudaArch::SM_37:
case CudaArch::SM_50:		case CudaArch::SM_50:
case CudaArch::SM_52:		case CudaArch::SM_52:
case CudaArch::SM_53:		case CudaArch::SM_53: {
case CudaArch::SM_60:
case CudaArch::SM_61:
case CudaArch::SM_62: {
SmallString<256> Buffer;		SmallString<256> Buffer;
llvm::raw_svector_ostream Out(Buffer);		llvm::raw_svector_ostream Out(Buffer);
Out << "Target architecture " << CudaArchToString(Arch)		Out << "Target architecture " << CudaArchToString(Arch)
<< " does not support unified addressing";		<< " does not support unified addressing";
CGM.Error(Clause->getBeginLoc(), Out.str());		CGM.Error(Clause->getBeginLoc(), Out.str());
return;		return;
}		}
		case CudaArch::SM_60:
		case CudaArch::SM_61:
		case CudaArch::SM_62:
case CudaArch::SM_70:		case CudaArch::SM_70:
case CudaArch::SM_72:		case CudaArch::SM_72:
case CudaArch::SM_75:		case CudaArch::SM_75:
case CudaArch::SM_80:		case CudaArch::SM_80:
case CudaArch::SM_86:		case CudaArch::SM_86:
case CudaArch::GFX600:		case CudaArch::GFX600:
case CudaArch::GFX601:		case CudaArch::GFX601:
case CudaArch::GFX602:		case CudaArch::GFX602:
▲ Show 20 Lines • Show All 249 Lines • Show Last 20 Lines

clang/test/OpenMP/requires_codegen.cpp

	// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -DREGION_HOST			// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -DREGION_HOST
	// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_20 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE			// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_20 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
	// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_21 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE			// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_21 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
	// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_30 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE			// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_30 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
	// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_32 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE			// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_32 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
	// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_35 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE			// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_35 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
	// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_37 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE			// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_37 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
	// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_50 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE			// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_50 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
	// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_52 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE			// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_52 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
	// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_53 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE			// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_53 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
	// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_60 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE			// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_60 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_NO_ERR
	// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_61 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE			// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_61 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_NO_ERR
	// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_62 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE			// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_62 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_NO_ERR
	// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_70 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_NO_ERR			// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_70 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_NO_ERR
	// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_72 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_NO_ERR			// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_72 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_NO_ERR
	// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_75 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_NO_ERR			// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_75 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_NO_ERR

	#if defined(REGION_HOST) \|\| defined(REGION_DEVICE_NO_ERR)			#if defined(REGION_HOST) \|\| defined(REGION_DEVICE_NO_ERR)
	// expected-no-diagnostics			// expected-no-diagnostics
	#pragma omp requires unified_shared_memory			#pragma omp requires unified_shared_memory
	#endif			#endif

	#ifdef REGION_DEVICE			#ifdef REGION_DEVICE
	#pragma omp requires unified_shared_memory // expected-error-re {{Target architecture sm_{{20\|21\|30\|32\|35\|37\|50\|52\|53\|60\|61\|62}} does not support unified addressing}}			#pragma omp requires unified_shared_memory // expected-error-re {{Target architecture sm_{{20\|21\|30\|32\|35\|37\|50\|52\|53\|60\|61\|62}} does not support unified addressing}}
	#endif			#endif