Diff 277515

clang/test/OpenMP/remarks_parallel_in_target_state_machine.c

This file was added.

				// RUN: %clang_cc1 -verify=host -Rpass=openmp -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
				// RUN: %clang_cc1 -verify -Rpass=openmp -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out
				// RUN: %clang_cc1 -fexperimental-new-pass-manager -verify -Rpass=openmp -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out

				// host-no-diagnostics

				#ifndef HEADER
				#define HEADER

				void bar(void) {
				#pragma omp parallel // #1
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code - #pragma omp parallel // #1 - // expected-remark@#1 {{Found parallel region that is called through a state machine__omp_outlined__2_wrapper in non-SPMD target region. This can lead to excessive register usage in unrelated kernels in the same translation unit due to spurious call edges assumed by ptxas.}} - // expected-remark@#1 {{Parallel region __omp_outlined__2_wrapper is not known to be called from a single target region only, maybe the surrounding function has external linkage?; will not attempt to rewrite the state machine use.}} - { - } +#pragma omp parallel // #1 + // expected-remark@#1 {{Found parallel region that is called through a state machine__omp_outlined__2_wrapper in non-SPMD target region. This can lead to excessive register usage in unrelated kernels in the same translation unit due to spurious call edges assumed by ptxas.}} + // expected-remark@#1 {{Parallel region __omp_outlined__2_wrapper is not known to be called from a single target region only, maybe the surrounding function has external linkage?; will not attempt to rewrite the state machine use.}} + { + } Lint: Pre-merge checks: clang-format: please reformat the code ``` - #pragma omp parallel // #1 - // expected…
				// expected-remark@#1 {{Found parallel region that is called through a state machine__omp_outlined__2_wrapper in non-SPMD target region. This can lead to excessive register usage in unrelated kernels in the same translation unit due to spurious call edges assumed by ptxas.}}
				ye-luoUnsubmitted Not Done Reply Inline Actions Add a space "machineomp_outlined2_wrapper" to "machine omp_outlined2_wrapper" ye-luo: Add a space "machine__omp_outlined__2_wrapper" to "machine __omp_outlined__2_wrapper"
				// expected-remark@#1 {{Parallel region __omp_outlined__2_wrapper is not known to be called from a single target region only, maybe the surrounding function has external linkage?; will not attempt to rewrite the state machine use.}}
				{
				}
				}

				void foo(void) {
				#pragma omp target teams
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code - #pragma omp target teams +#pragma omp target teams Lint: Pre-merge checks: clang-format: please reformat the code ``` - #pragma omp target teams +#pragma omp target…
				{
				#pragma omp parallel // #2
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code - #pragma omp parallel // #2 +#pragma omp parallel // #2 Lint: Pre-merge checks: clang-format: please reformat the code ``` - #pragma omp parallel // #2 +#pragma omp…
				// expected-remark@#2 {{Found parallel region that is called through a state machine__omp_outlined__1_wrapper in non-SPMD target region. This can lead to excessive register usage in unrelated kernels in the same translation unit due to spurious call edges assumed by ptxas.}}
				// expected-remark@#2 {{Replace state machine uses of parallel region __omp_outlined__1_wrapper called from kernel __omp_offloading_35_}}
				{
				}
				bar();
				#pragma omp parallel // #3
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code - #pragma omp parallel // #3 +#pragma omp parallel // #3 Lint: Pre-merge checks: clang-format: please reformat the code ``` - #pragma omp parallel // #3 +#pragma omp…
				// expected-remark@#3 {{Found parallel region that is called through a state machine__omp_outlined__3_wrapper in non-SPMD target region. This can lead to excessive register usage in unrelated kernels in the same translation unit due to spurious call edges assumed by ptxas.}}
				// expected-remark@#3 {{Replace state machine uses of parallel region __omp_outlined__3_wrapper called from kernel __omp_offloading_35_}}
				{
				}
				}
				}

				#endif

				// expected-remark@* {{OpenMP runtime call __kmpc_global_thread_num moved to}}
				// expected-remark@* {{OpenMP runtime call __kmpc_global_thread_num deduplicated}}

llvm/lib/Transforms/IPO/OpenMPOpt.cpp

Show First 20 Lines • Show All 1,027 Lines • ▼ Show 20 Lines	bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
if (!KernelPrepareParallelRFI)		if (!KernelPrepareParallelRFI)
return Changed;		return Changed;

for (Function *F : SCC) {		for (Function *F : SCC) {

// Check if the function is uses in a __kmpc_kernel_prepare_parallel call at		// Check if the function is uses in a __kmpc_kernel_prepare_parallel call at
// all.		// all.
bool UnknownUse = false;		bool UnknownUse = false;
		bool KernelPrepareUse = false;
unsigned NumDirectCalls = 0;		unsigned NumDirectCalls = 0;

SmallVector<Use *, 2> ToBeReplacedStateMachineUses;		SmallVector<Use *, 2> ToBeReplacedStateMachineUses;
foreachUse(*F, [&](Use &U) {		foreachUse(*F, [&](Use &U) {
if (auto *CB = dyn_cast<CallBase>(U.getUser()))		if (auto *CB = dyn_cast<CallBase>(U.getUser()))
if (CB->isCallee(&U)) {		if (CB->isCallee(&U)) {
++NumDirectCalls;		++NumDirectCalls;
return;		return;
}		}

if (isa<ICmpInst>(U.getUser())) {		if (isa<ICmpInst>(U.getUser())) {
ToBeReplacedStateMachineUses.push_back(&U);		ToBeReplacedStateMachineUses.push_back(&U);
return;		return;
}		}
if (OpenMPOpt::getCallIfRegularCall(*U.getUser(),		if (OpenMPOpt::getCallIfRegularCall(*U.getUser(),
&KernelPrepareParallelRFI)) {		&KernelPrepareParallelRFI)) {
		KernelPrepareUse = true;
ToBeReplacedStateMachineUses.push_back(&U);		ToBeReplacedStateMachineUses.push_back(&U);
return;		return;
}		}
UnknownUse = true;		UnknownUse = true;
});		});

// If this ever hits, we should investigate.		// Do not emit a remark if we haven't seen a __kmpc_kernel_prepare_parallel
if (UnknownUse \|\| NumDirectCalls != 1)		// use.
		if (!KernelPrepareUse)
continue;		continue;

// TODO: This is not a necessary restriction and should be lifted.		{
if (ToBeReplacedStateMachineUses.size() != 2)		auto Remark = [&](OptimizationRemark OR) {
		return OR
		<< "Found parallel region that is called through a state machine"
		<< ore::NV("OpenMPParallelRegion", F->getName())
		<< " in non-SPMD target region. This can lead to excessive "
		"register usage in unrelated kernels in the same translation "
		"unit due to spurious call edges assumed by ptxas.";
		};
		emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD", Remark);
		}

		// If this ever hits, we should investigate.
		// TODO: Checking the number of uses is not a necessary restriction and
		// should be lifted.
		if (UnknownUse \|\| NumDirectCalls != 1 \|\|
		ToBeReplacedStateMachineUses.size() != 2) {
		{
		auto Remark = [&](OptimizationRemark OR) {
		return OR << "Parallel region "
		<< ore::NV("OpenMPParallelRegion", F->getName()) << " has "
		<< (UnknownUse ? "unknown" : "unexpected")
		<< " uses; will not attempt to rewrite the state machine.";
		};
		emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD", Remark);
		}
continue;		continue;
		}

// Even if we have __kmpc_kernel_prepare_parallel calls, we (for now) give		// Even if we have __kmpc_kernel_prepare_parallel calls, we (for now) give
// up if the function is not called from a unique kernel.		// up if the function is not called from a unique kernel.
Kernel K = getUniqueKernelFor(*F);		Kernel K = getUniqueKernelFor(*F);
if (!K)		if (!K) {
		{
		auto Remark = [&](OptimizationRemark OR) {
		return OR << "Parallel region "
		<< ore::NV("OpenMPParallelRegion", F->getName())
		<< " is not known to be called from a single target region "
		"only, maybe the surrounding function has external "
		"linkage?; "
		"will not attempt to rewrite the state machine use.";
		};
		emitRemarkOnFunction(F, "OpenMPParallelRegionInMultipleKernesl",
		Remark);
		}
continue;		continue;
		}

// We now know F is a parallel body function called only from the kernel K.		// We now know F is a parallel body function called only from the kernel K.
// We also identified the state machine uses in which we replace the		// We also identified the state machine uses in which we replace the
// function pointer by a new global symbol for identification purposes. This		// function pointer by a new global symbol for identification purposes. This
// ensures only direct calls to the function are left.		// ensures only direct calls to the function are left.

		{
		auto Remark = [&](OptimizationRemark OR) {
		return OR << "Replace state machine uses of parallel region "
		<< ore::NV("OpenMPParallelRegion", F->getName())
		<< " called from kernel "
		<< ore::NV("OpenMPParallelRegionKernel", K->getName()) << ".";
		};
		emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD", Remark);
		}

Module &M = *F->getParent();		Module &M = *F->getParent();
Type *Int8Ty = Type::getInt8Ty(M.getContext());		Type *Int8Ty = Type::getInt8Ty(M.getContext());

auto *ID = new GlobalVariable(		auto *ID = new GlobalVariable(
M, Int8Ty, /* isConstant */ true, GlobalValue::PrivateLinkage,		M, Int8Ty, /* isConstant */ true, GlobalValue::PrivateLinkage,
UndefValue::get(Int8Ty), F->getName() + ".ID");		UndefValue::get(Int8Ty), F->getName() + ".ID");

for (Use *U : ToBeReplacedStateMachineUses)		for (Use *U : ToBeReplacedStateMachineUses)
▲ Show 20 Lines • Show All 347 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[OpenMP][NFC] Emit remarks during GPU state machine optimization
ClosedPublic

Details

Diff Detail

Unit TestsFailed

Event Timeline

Revision Contents

Diff 277515

clang/test/OpenMP/remarks_parallel_in_target_state_machine.c

llvm/lib/Transforms/IPO/OpenMPOpt.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[OpenMP][NFC] Emit remarks during GPU state machine optimizationClosedPublic

Details

Diff Detail

Unit TestsFailed

Event Timeline

Revision Contents

Diff 277515

clang/test/OpenMP/remarks_parallel_in_target_state_machine.c

llvm/lib/Transforms/IPO/OpenMPOpt.cpp

[OpenMP][NFC] Emit remarks during GPU state machine optimization
ClosedPublic