Diff 537127

openmp/libomptarget/DeviceRTL/include/Configuration.h

	Show All 31 Lines
	uint32_t getDeviceNum();			uint32_t getDeviceNum();

	/// Return the user choosen debug level.			/// Return the user choosen debug level.
	uint32_t getDebugKind();			uint32_t getDebugKind();

	/// Return the amount of dynamic shared memory that was allocated at launch.			/// Return the amount of dynamic shared memory that was allocated at launch.
	uint64_t getDynamicMemorySize();			uint64_t getDynamicMemorySize();

				/// Returns the cycles per second of the device's fixed frequency clock.
				uint64_t getClockFrequency();

	/// Return if debugging is enabled for the given debug kind.			/// Return if debugging is enabled for the given debug kind.
	bool isDebugMode(DebugKind Level);			bool isDebugMode(DebugKind Level);

	/// Indicates if this kernel may require thread-specific states, or if it was			/// Indicates if this kernel may require thread-specific states, or if it was
	/// explicitly disabled by the user.			/// explicitly disabled by the user.
	bool mayUseThreadStates();			bool mayUseThreadStates();

	/// Indicates if this kernel may require data environments for nested			/// Indicates if this kernel may require data environments for nested
	/// parallelism, or if it was explicitly disabled by the user.			/// parallelism, or if it was explicitly disabled by the user.
	bool mayUseNestedParallelism();			bool mayUseNestedParallelism();

	} // namespace config			} // namespace config
	} // namespace ompx			} // namespace ompx

	#endif			#endif

openmp/libomptarget/DeviceRTL/src/Configuration.cpp

	Show All 40 Lines
	uint32_t config::getDeviceNum() {			uint32_t config::getDeviceNum() {
	return __omp_rtl_device_environment.DeviceNum;			return __omp_rtl_device_environment.DeviceNum;
	}			}

	uint64_t config::getDynamicMemorySize() {			uint64_t config::getDynamicMemorySize() {
	return __omp_rtl_device_environment.DynamicMemSize;			return __omp_rtl_device_environment.DynamicMemSize;
	}			}

				uint64_t config::getClockFrequency() {
				return __omp_rtl_device_environment.ClockFrequency;
				}

	bool config::isDebugMode(config::DebugKind Kind) {			bool config::isDebugMode(config::DebugKind Kind) {
	return config::getDebugKind() & Kind;			return config::getDebugKind() & Kind;
	}			}

	bool config::mayUseThreadStates() { return !__omp_rtl_assume_no_thread_state; }			bool config::mayUseThreadStates() { return !__omp_rtl_assume_no_thread_state; }

	bool config::mayUseNestedParallelism() {			bool config::mayUseNestedParallelism() {
	return !__omp_rtl_assume_no_nested_parallelism;			return !__omp_rtl_assume_no_nested_parallelism;
	}			}

	#pragma omp end declare target			#pragma omp end declare target

openmp/libomptarget/DeviceRTL/src/Misc.cpp

	//===--------- Misc.cpp - OpenMP device misc interfaces ----------- C++ -*-===//			//===--------- Misc.cpp - OpenMP device misc interfaces ----------- C++ -*-===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	//			//
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

				#include "Configuration.h"
	#include "Types.h"			#include "Types.h"

	#include "Debug.h"			#include "Debug.h"

	#pragma omp begin declare target device_type(nohost)			#pragma omp begin declare target device_type(nohost)

	namespace ompx {			namespace ompx {
	namespace impl {			namespace impl {

	double getWTick();			double getWTick();

	double getWTime();			double getWTime();

	/// AMDGCN Implementation			/// AMDGCN Implementation
	///			///
	///{			///{
	#pragma omp begin declare variant match(device = {arch(amdgcn)})			#pragma omp begin declare variant match(device = {arch(amdgcn)})

	double getWTick() { return ((double)1E-9); }			double getWTick() {
				// The number of ticks per second for the AMDGPU clock varies by card and can
				// only be retrived by querying the driver. We rely on the device environment
				// to inform us what the proper frequency is.
				return 1.0 / config::getClockFrequency();
				}

	double getWTime() {			double getWTime() {
	// The intrinsics for measuring time have undocumented frequency			uint64_t NumTicks = 0;
	// This will probably need to be found by measurement on a number of			if constexpr (__has_builtin(__builtin_amdgcn_s_sendmsg_rtnl))
	// architectures. Until then, return 0, which is very inaccurate as a			NumTicks = __builtin_amdgcn_s_sendmsg_rtnl(0x83);
	// timer but resolves the undefined symbol at link time.			else if constexpr (__has_builtin(__builtin_amdgcn_s_memrealtime))
	return 0;			NumTicks = __builtin_amdgcn_s_memrealtime();
				else if constexpr (__has_builtin(__builtin_amdgcn_s_memtime))
				NumTicks = __builtin_amdgcn_s_memtime();

				return static_cast<double>(NumTicks) * getWTick();
	}			}

	#pragma omp end declare variant			#pragma omp end declare variant

	/// NVPTX Implementation			/// NVPTX Implementation
	///			///
	///{			///{
	#pragma omp begin declare variant match( \			#pragma omp begin declare variant match( \
	▲ Show 20 Lines • Show All 41 Lines • Show Last 20 Lines

openmp/libomptarget/include/DeviceEnvironment.h

	Show All 14 Lines

	// deviceRTL uses <stdint> and DeviceRTL uses explicit definitions			// deviceRTL uses <stdint> and DeviceRTL uses explicit definitions

	struct DeviceEnvironmentTy {			struct DeviceEnvironmentTy {
	uint32_t DebugKind;			uint32_t DebugKind;
	uint32_t NumDevices;			uint32_t NumDevices;
	uint32_t DeviceNum;			uint32_t DeviceNum;
	uint32_t DynamicMemSize;			uint32_t DynamicMemSize;
				uint64_t ClockFrequency;
	};			};

	#endif			#endif

openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp

Show First 20 Lines • Show All 1,590 Lines • ▼ Show 20 Lines	Error initImpl(GenericPluginTy &Plugin) override {
ComputeUnitKind = GPUName;		ComputeUnitKind = GPUName;

// Get the wavefront size.		// Get the wavefront size.
uint32_t WavefrontSize = 0;		uint32_t WavefrontSize = 0;
if (auto Err = getDeviceAttr(HSA_AGENT_INFO_WAVEFRONT_SIZE, WavefrontSize))		if (auto Err = getDeviceAttr(HSA_AGENT_INFO_WAVEFRONT_SIZE, WavefrontSize))
return Err;		return Err;
GridValues.GV_Warp_Size = WavefrontSize;		GridValues.GV_Warp_Size = WavefrontSize;

		// Get the frequency of the steady clock.
		if (auto Err = getDeviceAttr(HSA_AMD_AGENT_INFO_TIMESTAMP_FREQUENCY,
		ClockFrequency))
		return Err;

// Load the grid values dependending on the wavefront.		// Load the grid values dependending on the wavefront.
if (WavefrontSize == 32)		if (WavefrontSize == 32)
GridValues = getAMDGPUGridValues<32>();		GridValues = getAMDGPUGridValues<32>();
else if (WavefrontSize == 64)		else if (WavefrontSize == 64)
GridValues = getAMDGPUGridValues<64>();		GridValues = getAMDGPUGridValues<64>();
else		else
return Plugin::error("Unexpected AMDGPU wavefront %d", WavefrontSize);		return Plugin::error("Unexpected AMDGPU wavefront %d", WavefrontSize);

▲ Show 20 Lines • Show All 145 Lines • ▼ Show 20 Lines	doJITPostProcessing(std::unique_ptr<MemoryBuffer> MB) const override {

return std::move(		return std::move(
MemoryBuffer::getFileOrSTDIN(LinkerOutputFilePath.data()).get());		MemoryBuffer::getFileOrSTDIN(LinkerOutputFilePath.data()).get());
}		}

/// See GenericDeviceTy::getComputeUnitKind().		/// See GenericDeviceTy::getComputeUnitKind().
std::string getComputeUnitKind() const override { return ComputeUnitKind; }		std::string getComputeUnitKind() const override { return ComputeUnitKind; }

		/// Returns the clock frequency for the given AMDGPU device.
		uint64_t getClockFrequency() const override { return ClockFrequency; }

/// Allocate and construct an AMDGPU kernel.		/// Allocate and construct an AMDGPU kernel.
Expected<GenericKernelTy *>		Expected<GenericKernelTy *>
constructKernelEntry(const __tgt_offload_entry &KernelEntry,		constructKernelEntry(const __tgt_offload_entry &KernelEntry,
DeviceImageTy &Image) override {		DeviceImageTy &Image) override {

Expected<OMPTgtExecModeFlags> ExecModeOrErr =		Expected<OMPTgtExecModeFlags> ExecModeOrErr =
getExecutionModeForKernel(KernelEntry.name, Image);		getExecutionModeForKernel(KernelEntry.name, Image);
if (!ExecModeOrErr)		if (!ExecModeOrErr)
▲ Show 20 Lines • Show All 644 Lines • ▼ Show 20 Lines	private:
AMDGPUSignalManagerTy AMDGPUSignalManager;		AMDGPUSignalManagerTy AMDGPUSignalManager;

/// The agent handler corresponding to the device.		/// The agent handler corresponding to the device.
hsa_agent_t Agent;		hsa_agent_t Agent;

/// The GPU architecture.		/// The GPU architecture.
std::string ComputeUnitKind;		std::string ComputeUnitKind;

		/// The frequency of the steady clock inside the device.
		uint64_t ClockFrequency;

/// Reference to the host device.		/// Reference to the host device.
AMDHostDeviceTy &HostDevice;		AMDHostDeviceTy &HostDevice;

/// List of device packet queues.		/// List of device packet queues.
std::vector<AMDGPUQueueTy> Queues;		std::vector<AMDGPUQueueTy> Queues;
};		};

Error AMDGPUDeviceImageTy::loadExecutable(const AMDGPUDeviceTy &Device) {		Error AMDGPUDeviceImageTy::loadExecutable(const AMDGPUDeviceTy &Device) {
▲ Show 20 Lines • Show All 551 Lines • Show Last 20 Lines

openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h

Show First 20 Lines • Show All 726 Lines • ▼ Show 20 Lines	struct GenericDeviceTy : public DeviceAllocatorTy {
uint32_t getBlockLimit() const { return GridValues.GV_Max_Teams; }		uint32_t getBlockLimit() const { return GridValues.GV_Max_Teams; }
uint32_t getDefaultNumThreads() const {		uint32_t getDefaultNumThreads() const {
return GridValues.GV_Default_WG_Size;		return GridValues.GV_Default_WG_Size;
}		}
uint32_t getDefaultNumBlocks() const {		uint32_t getDefaultNumBlocks() const {
return GridValues.GV_Default_Num_Teams;		return GridValues.GV_Default_Num_Teams;
}		}
uint32_t getDynamicMemorySize() const { return OMPX_SharedMemorySize; }		uint32_t getDynamicMemorySize() const { return OMPX_SharedMemorySize; }
		virtual uint64_t getClockFrequency() const { return CLOCKS_PER_SEC; }

/// Get target compute unit kind (e.g., sm_80, or gfx908).		/// Get target compute unit kind (e.g., sm_80, or gfx908).
virtual std::string getComputeUnitKind() const { return "unknown"; }		virtual std::string getComputeUnitKind() const { return "unknown"; }

/// Post processing after jit backend. The ownership of \p MB will be taken.		/// Post processing after jit backend. The ownership of \p MB will be taken.
virtual Expected<std::unique_ptr<MemoryBuffer>>		virtual Expected<std::unique_ptr<MemoryBuffer>>
doJITPostProcessing(std::unique_ptr<MemoryBuffer> MB) const {		doJITPostProcessing(std::unique_ptr<MemoryBuffer> MB) const {
return std::move(MB);		return std::move(MB);
▲ Show 20 Lines • Show All 474 Lines • Show Last 20 Lines

openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp

Show First 20 Lines • Show All 503 Lines • ▼ Show 20 Lines	if (!shouldSetupDeviceEnvironment())
return Plugin::success();		return Plugin::success();

DeviceEnvironmentTy DeviceEnvironment;		DeviceEnvironmentTy DeviceEnvironment;
DeviceEnvironment.DebugKind = OMPX_DebugKind;		DeviceEnvironment.DebugKind = OMPX_DebugKind;
DeviceEnvironment.NumDevices = Plugin.getNumDevices();		DeviceEnvironment.NumDevices = Plugin.getNumDevices();
// TODO: The device ID used here is not the real device ID used by OpenMP.		// TODO: The device ID used here is not the real device ID used by OpenMP.
DeviceEnvironment.DeviceNum = DeviceId;		DeviceEnvironment.DeviceNum = DeviceId;
DeviceEnvironment.DynamicMemSize = OMPX_SharedMemorySize;		DeviceEnvironment.DynamicMemSize = OMPX_SharedMemorySize;
		DeviceEnvironment.ClockFrequency = getClockFrequency();

// Create the metainfo of the device environment global.		// Create the metainfo of the device environment global.
GlobalTy DevEnvGlobal("__omp_rtl_device_environment",		GlobalTy DevEnvGlobal("__omp_rtl_device_environment",
sizeof(DeviceEnvironmentTy), &DeviceEnvironment);		sizeof(DeviceEnvironmentTy), &DeviceEnvironment);

// Write device environment values to the device.		// Write device environment values to the device.
GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler();		GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler();
if (auto Err = GHandler.writeGlobalToDevice(*this, Image, DevEnvGlobal)) {		if (auto Err = GHandler.writeGlobalToDevice(*this, Image, DevEnvGlobal)) {
▲ Show 20 Lines • Show All 1,037 Lines • Show Last 20 Lines

openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp

Show First 20 Lines • Show All 823 Lines • ▼ Show 20 Lines	CUresult getDeviceAttrRaw(uint32_t Kind, int &Value) {
return cuDeviceGetAttribute(&Value, (CUdevice_attribute)Kind, Device);		return cuDeviceGetAttribute(&Value, (CUdevice_attribute)Kind, Device);
}		}

/// See GenericDeviceTy::getComputeUnitKind().		/// See GenericDeviceTy::getComputeUnitKind().
std::string getComputeUnitKind() const override {		std::string getComputeUnitKind() const override {
return ComputeCapability.str();		return ComputeCapability.str();
}		}

		/// Returns the clock frequency for the given NVPTX device.
		uint64_t getClockFrequency() const override { return 1000000000; }
		tianshilei1992Unsubmitted Not Done Reply Inline Actions This doesn't need to be `1000000000UL`? tianshilei1992: This doesn't need to be `1000000000UL`?
		jhuber6AuthorUnsubmitted Done Reply Inline Actions A 32-bit integer fits at least two billion, so we're just under here. jhuber6: A 32-bit integer fits at least two billion, so we're just under here.

private:		private:
using CUDAStreamManagerTy = GenericDeviceResourceManagerTy<CUDAStreamRef>;		using CUDAStreamManagerTy = GenericDeviceResourceManagerTy<CUDAStreamRef>;
using CUDAEventManagerTy = GenericDeviceResourceManagerTy<CUDAEventRef>;		using CUDAEventManagerTy = GenericDeviceResourceManagerTy<CUDAEventRef>;

/// Stream manager for CUDA streams.		/// Stream manager for CUDA streams.
CUDAStreamManagerTy CUDAStreamManager;		CUDAStreamManagerTy CUDAStreamManager;

/// Event manager for CUDA events.		/// Event manager for CUDA events.
▲ Show 20 Lines • Show All 252 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[Libomptarget] Correctly implement `getWTime` on AMDGPU
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 537127

openmp/libomptarget/DeviceRTL/include/Configuration.h

openmp/libomptarget/DeviceRTL/src/Configuration.cpp

openmp/libomptarget/DeviceRTL/src/Misc.cpp

openmp/libomptarget/include/DeviceEnvironment.h

openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp

openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h

openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp

openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[Libomptarget] Correctly implement `getWTime` on AMDGPUClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 537127

openmp/libomptarget/DeviceRTL/include/Configuration.h

openmp/libomptarget/DeviceRTL/src/Configuration.cpp

openmp/libomptarget/DeviceRTL/src/Misc.cpp

openmp/libomptarget/include/DeviceEnvironment.h

openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp

openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h

openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp

openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp

[Libomptarget] Correctly implement `getWTime` on AMDGPU
ClosedPublic