Diff 544910

clang/include/clang/Basic/CodeGenOptions.def

	Show First 20 Lines • Show All 196 Lines • ▼ Show 20 Lines
	CODEGENOPT(UniqueInternalLinkageNames, 1, 0) ///< Internal Linkage symbols get unique names.			CODEGENOPT(UniqueInternalLinkageNames, 1, 0) ///< Internal Linkage symbols get unique names.
	CODEGENOPT(SplitMachineFunctions, 1, 0) ///< Split machine functions using profile information.			CODEGENOPT(SplitMachineFunctions, 1, 0) ///< Split machine functions using profile information.

	/// When false, this attempts to generate code as if the result of an			/// When false, this attempts to generate code as if the result of an
	/// overflowing conversion matches the overflowing behavior of a target's native			/// overflowing conversion matches the overflowing behavior of a target's native
	/// float-to-int conversion instructions.			/// float-to-int conversion instructions.
	CODEGENOPT(StrictFloatCastOverflow, 1, 1)			CODEGENOPT(StrictFloatCastOverflow, 1, 1)

	CODEGENOPT(UniformWGSize , 1, 0) ///< -cl-uniform-work-group-size
	CODEGENOPT(NoZeroInitializedInBSS , 1, 0) ///< -fno-zero-initialized-in-bss.			CODEGENOPT(NoZeroInitializedInBSS , 1, 0) ///< -fno-zero-initialized-in-bss.
	/// Method of Objective-C dispatch to use.			/// Method of Objective-C dispatch to use.
	ENUM_CODEGENOPT(ObjCDispatchMethod, ObjCDispatchMethodKind, 2, Legacy)			ENUM_CODEGENOPT(ObjCDispatchMethod, ObjCDispatchMethodKind, 2, Legacy)
	/// Replace certain message sends with calls to ObjC runtime entrypoints			/// Replace certain message sends with calls to ObjC runtime entrypoints
	CODEGENOPT(ObjCConvertMessagesToRuntimeCalls , 1, 1)			CODEGENOPT(ObjCConvertMessagesToRuntimeCalls , 1, 1)
	CODEGENOPT(ObjCAvoidHeapifyLocalBlocks, 1, 0)			CODEGENOPT(ObjCAvoidHeapifyLocalBlocks, 1, 0)

	VALUE_CODEGENOPT(OptimizationLevel, 2, 0) ///< The -O[0-3] option specified.			VALUE_CODEGENOPT(OptimizationLevel, 2, 0) ///< The -O[0-3] option specified.
	▲ Show 20 Lines • Show All 306 Lines • Show Last 20 Lines

clang/include/clang/Basic/LangOptions.def

	Show First 20 Lines • Show All 272 Lines • ▼ Show 20 Lines
	LANGOPT(GPUExcludeWrongSideOverloads, 1, 0, "always exclude wrong side overloads in overloading resolution for CUDA/HIP")			LANGOPT(GPUExcludeWrongSideOverloads, 1, 0, "always exclude wrong side overloads in overloading resolution for CUDA/HIP")
	LANGOPT(OffloadingNewDriver, 1, 0, "use the new driver for generating offloading code.")			LANGOPT(OffloadingNewDriver, 1, 0, "use the new driver for generating offloading code.")

	LANGOPT(SYCLIsDevice , 1, 0, "Generate code for SYCL device")			LANGOPT(SYCLIsDevice , 1, 0, "Generate code for SYCL device")
	LANGOPT(SYCLIsHost , 1, 0, "SYCL host compilation")			LANGOPT(SYCLIsHost , 1, 0, "SYCL host compilation")
	ENUM_LANGOPT(SYCLVersion , SYCLMajorVersion, 2, SYCL_None, "Version of the SYCL standard used")			ENUM_LANGOPT(SYCLVersion , SYCLMajorVersion, 2, SYCL_None, "Version of the SYCL standard used")

	LANGOPT(HIPUseNewLaunchAPI, 1, 0, "Use new kernel launching API for HIP")			LANGOPT(HIPUseNewLaunchAPI, 1, 0, "Use new kernel launching API for HIP")
				LANGOPT(OffloadUniformBlock, 1, 0, "Assume that kernels are launched with uniform block sizes (default true for CUDA/HIP and false otherwise)")

	LANGOPT(SizedDeallocation , 1, 0, "sized deallocation")			LANGOPT(SizedDeallocation , 1, 0, "sized deallocation")
	LANGOPT(AlignedAllocation , 1, 0, "aligned allocation")			LANGOPT(AlignedAllocation , 1, 0, "aligned allocation")
	LANGOPT(AlignedAllocationUnavailable, 1, 0, "aligned allocation functions are unavailable")			LANGOPT(AlignedAllocationUnavailable, 1, 0, "aligned allocation functions are unavailable")
	LANGOPT(NewAlignOverride , 32, 0, "maximum alignment guaranteed by '::operator new(size_t)'")			LANGOPT(NewAlignOverride , 32, 0, "maximum alignment guaranteed by '::operator new(size_t)'")
	BENIGN_LANGOPT(ModulesCodegen , 1, 0, "Modules code generation")			BENIGN_LANGOPT(ModulesCodegen , 1, 0, "Modules code generation")
	BENIGN_LANGOPT(ModulesDebugInfo , 1, 0, "Modules debug info")			BENIGN_LANGOPT(ModulesDebugInfo , 1, 0, "Modules debug info")
	BENIGN_LANGOPT(ElideConstructors , 1, 1, "C++ copy constructor elision")			BENIGN_LANGOPT(ElideConstructors , 1, 1, "C++ copy constructor elision")
	▲ Show 20 Lines • Show All 191 Lines • Show Last 20 Lines

clang/include/clang/Driver/Options.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 906 Lines • ▼ Show 20 Lines
def arch__only : Separate<["-"], "arch_only">;		def arch__only : Separate<["-"], "arch_only">;
def autocomplete : Joined<["--"], "autocomplete=">;		def autocomplete : Joined<["--"], "autocomplete=">;
def bind__at__load : Flag<["-"], "bind_at_load">;		def bind__at__load : Flag<["-"], "bind_at_load">;
def bundle__loader : Separate<["-"], "bundle_loader">;		def bundle__loader : Separate<["-"], "bundle_loader">;
def bundle : Flag<["-"], "bundle">;		def bundle : Flag<["-"], "bundle">;
def b : JoinedOrSeparate<["-"], "b">, Flags<[LinkerInput]>,		def b : JoinedOrSeparate<["-"], "b">, Flags<[LinkerInput]>,
HelpText<"Pass -b <arg> to the linker on AIX">, MetaVarName<"<arg>">,		HelpText<"Pass -b <arg> to the linker on AIX">, MetaVarName<"<arg>">,
Group<Link_Group>;		Group<Link_Group>;

		defm offload_uniform_block : BoolFOption<"offload-uniform-block",
		LangOpts<"OffloadUniformBlock">, Default<"LangOpts->CUDA">,
		PosFlag<SetTrue, [CC1Option], "Assume">, NegFlag<SetFalse, [CC1Option], "Don't assume">,
		BothFlags<[], " that kernels are launched with uniform block sizes (default true for CUDA/HIP and false otherwise)">>;

// OpenCL-only Options		// OpenCL-only Options
def cl_opt_disable : Flag<["-"], "cl-opt-disable">, Group<opencl_Group>, Flags<[CC1Option]>,		def cl_opt_disable : Flag<["-"], "cl-opt-disable">, Group<opencl_Group>, Flags<[CC1Option]>,
HelpText<"OpenCL only. This option disables all optimizations. By default optimizations are enabled.">;		HelpText<"OpenCL only. This option disables all optimizations. By default optimizations are enabled.">;
def cl_strict_aliasing : Flag<["-"], "cl-strict-aliasing">, Group<opencl_Group>, Flags<[CC1Option]>,		def cl_strict_aliasing : Flag<["-"], "cl-strict-aliasing">, Group<opencl_Group>, Flags<[CC1Option]>,
HelpText<"OpenCL only. This option is added for compatibility with OpenCL 1.0.">;		HelpText<"OpenCL only. This option is added for compatibility with OpenCL 1.0.">;
def cl_single_precision_constant : Flag<["-"], "cl-single-precision-constant">, Group<opencl_Group>, Flags<[CC1Option]>,		def cl_single_precision_constant : Flag<["-"], "cl-single-precision-constant">, Group<opencl_Group>, Flags<[CC1Option]>,
HelpText<"OpenCL only. Treat double precision floating-point constant as single precision constant.">,		HelpText<"OpenCL only. Treat double precision floating-point constant as single precision constant.">,
MarshallingInfoFlag<LangOpts<"SinglePrecisionConstants">>;		MarshallingInfoFlag<LangOpts<"SinglePrecisionConstants">>;
Show All 19 Lines
def cl_std_EQ : Joined<["-"], "cl-std=">, Group<opencl_Group>, Flags<[CC1Option]>,		def cl_std_EQ : Joined<["-"], "cl-std=">, Group<opencl_Group>, Flags<[CC1Option]>,
HelpText<"OpenCL language standard to compile for.">,		HelpText<"OpenCL language standard to compile for.">,
Values<"cl,CL,cl1.0,CL1.0,cl1.1,CL1.1,cl1.2,CL1.2,cl2.0,CL2.0,cl3.0,CL3.0,clc++,CLC++,clc++1.0,CLC++1.0,clc++2021,CLC++2021">;		Values<"cl,CL,cl1.0,CL1.0,cl1.1,CL1.1,cl1.2,CL1.2,cl2.0,CL2.0,cl3.0,CL3.0,clc++,CLC++,clc++1.0,CLC++1.0,clc++2021,CLC++2021">;
def cl_denorms_are_zero : Flag<["-"], "cl-denorms-are-zero">, Group<opencl_Group>,		def cl_denorms_are_zero : Flag<["-"], "cl-denorms-are-zero">, Group<opencl_Group>,
HelpText<"OpenCL only. Allow denormals to be flushed to zero.">;		HelpText<"OpenCL only. Allow denormals to be flushed to zero.">;
def cl_fp32_correctly_rounded_divide_sqrt : Flag<["-"], "cl-fp32-correctly-rounded-divide-sqrt">, Group<opencl_Group>, Flags<[CC1Option]>,		def cl_fp32_correctly_rounded_divide_sqrt : Flag<["-"], "cl-fp32-correctly-rounded-divide-sqrt">, Group<opencl_Group>, Flags<[CC1Option]>,
HelpText<"OpenCL only. Specify that single precision floating-point divide and sqrt used in the program source are correctly rounded.">,		HelpText<"OpenCL only. Specify that single precision floating-point divide and sqrt used in the program source are correctly rounded.">,
MarshallingInfoFlag<CodeGenOpts<"OpenCLCorrectlyRoundedDivSqrt">>;		MarshallingInfoFlag<CodeGenOpts<"OpenCLCorrectlyRoundedDivSqrt">>;
def cl_uniform_work_group_size : Flag<["-"], "cl-uniform-work-group-size">, Group<opencl_Group>, Flags<[CC1Option]>,		def cl_uniform_work_group_size : Flag<["-"], "cl-uniform-work-group-size">, Group<opencl_Group>, Flags<[CC1Option]>, Alias<foffload_uniform_block>,
HelpText<"OpenCL only. Defines that the global work-size be a multiple of the work-group size specified to clEnqueueNDRangeKernel">,		HelpText<"OpenCL only. Defines that the global work-size be a multiple of the work-group size specified to clEnqueueNDRangeKernel">;
MarshallingInfoFlag<CodeGenOpts<"UniformWGSize">>;
def cl_no_stdinc : Flag<["-"], "cl-no-stdinc">, Group<opencl_Group>,		def cl_no_stdinc : Flag<["-"], "cl-no-stdinc">, Group<opencl_Group>,
HelpText<"OpenCL only. Disables all standard includes containing non-native compiler types and functions.">;		HelpText<"OpenCL only. Disables all standard includes containing non-native compiler types and functions.">;
def cl_ext_EQ : CommaJoined<["-"], "cl-ext=">, Group<opencl_Group>, Flags<[CC1Option]>,		def cl_ext_EQ : CommaJoined<["-"], "cl-ext=">, Group<opencl_Group>, Flags<[CC1Option]>,
HelpText<"OpenCL only. Enable or disable OpenCL extensions/optional features. The argument is a comma-separated "		HelpText<"OpenCL only. Enable or disable OpenCL extensions/optional features. The argument is a comma-separated "
"sequence of one or more extension names, each prefixed by '+' or '-'.">,		"sequence of one or more extension names, each prefixed by '+' or '-'.">,
MarshallingInfoStringVector<TargetOpts<"OpenCLExtensionsAsWritten">>;		MarshallingInfoStringVector<TargetOpts<"OpenCLExtensionsAsWritten">>;

def client__name : JoinedOrSeparate<["-"], "client_name">;		def client__name : JoinedOrSeparate<["-"], "client_name">;
▲ Show 20 Lines • Show All 123 Lines • ▼ Show 20 Lines	defm hip_fp32_correctly_rounded_divide_sqrt : BoolFOption<"hip-fp32-correctly-rounded-divide-sqrt",
"the program source are correctly rounded (HIP device compilation only)">>,		"the program source are correctly rounded (HIP device compilation only)">>,
ShouldParseIf<hip.KeyPath>;		ShouldParseIf<hip.KeyPath>;
defm hip_kernel_arg_name : BoolFOption<"hip-kernel-arg-name",		defm hip_kernel_arg_name : BoolFOption<"hip-kernel-arg-name",
CodeGenOpts<"HIPSaveKernelArgName">, DefaultFalse,		CodeGenOpts<"HIPSaveKernelArgName">, DefaultFalse,
PosFlag<SetTrue, [CC1Option], "Specify">,		PosFlag<SetTrue, [CC1Option], "Specify">,
NegFlag<SetFalse, [], "Don't specify">,		NegFlag<SetFalse, [], "Don't specify">,
BothFlags<[], " that kernel argument names are preserved (HIP only)">>,		BothFlags<[], " that kernel argument names are preserved (HIP only)">>,
ShouldParseIf<hip.KeyPath>;		ShouldParseIf<hip.KeyPath>;
def hipspv_pass_plugin_EQ : Joined<["--"], "hipspv-pass-plugin=">,		def hipspv_pass_plugin_EQ : Joined<["--"], "hipspv-pass-plugin=">,
		arsenmUnsubmitted Done Reply Inline Actions Can we avoid adding yet another language flag for something that's reusable for everything? Is there an --offload-<something> ? arsenm: Can we avoid adding yet another language flag for something that's reusable for everything? Is…
		yaxunlAuthorUnsubmitted Done Reply Inline Actions Currently, the naming convention for shared CUDA/HIP language options is `-fgpu-` or `--gpu- . The shared CUDA/HIP/OpenMP driver options are named` --offload-`. This option is named `-fhip-uniform-block` because AFAIK CUDA does not support non-uniform block size. If we want to make it a generic option, it should be named as `-fgpu-uniform-block` by the current naming convention. Unless we want to change the naming convention for generic offloading language options. @tra What do you think? Thanks. yaxunl:* Currently, the naming convention for shared CUDA/HIP language options is `-fgpu-` or `--gpu- .
		yaxunlAuthorUnsubmitted Done Reply Inline Actions I am thinking, maybe it is time to start moving towards the final direction. How about renaming it as `--offload-uniform-block` ? @MaskRay @tra yaxunl: I am thinking, maybe it is time to start moving towards the final direction. How about…
		scchanUnsubmitted Done Reply Inline Actions Don't we need a different default value for some languages like OpenCL? scchan: Don't we need a different default value for some languages like OpenCL?
		arsenmUnsubmitted Done Reply Inline Actions Yes, but opencl already has a spec'd flag for this. If we're making up a new one, it could be something generic that aliases the opencl one in that case. Plus the +/- value of a new flag should work (the CL one only goes in one direction) arsenm: Yes, but opencl already has a spec'd flag for this. If we're making up a new one, it could be…
Group<Link_Group>, MetaVarName<"<dsopath>">,		Group<Link_Group>, MetaVarName<"<dsopath>">,
HelpText<"path to a pass plugin for HIP to SPIR-V passes.">;		HelpText<"path to a pass plugin for HIP to SPIR-V passes.">;
defm gpu_allow_device_init : BoolFOption<"gpu-allow-device-init",		defm gpu_allow_device_init : BoolFOption<"gpu-allow-device-init",
LangOpts<"GPUAllowDeviceInit">, DefaultFalse,		LangOpts<"GPUAllowDeviceInit">, DefaultFalse,
PosFlag<SetTrue, [CC1Option], "Allow">, NegFlag<SetFalse, [], "Don't allow">,		PosFlag<SetTrue, [CC1Option], "Allow">, NegFlag<SetFalse, [], "Don't allow">,
BothFlags<[], " device side init function in HIP (experimental)">>,		BothFlags<[], " device side init function in HIP (experimental)">>,
ShouldParseIf<hip.KeyPath>;		ShouldParseIf<hip.KeyPath>;
defm gpu_defer_diag : BoolFOption<"gpu-defer-diag",		defm gpu_defer_diag : BoolFOption<"gpu-defer-diag",
▲ Show 20 Lines • Show All 6,359 Lines • Show Last 20 Lines

clang/lib/CodeGen/CGCall.cpp

Show First 20 Lines • Show All 2,381 Lines • ▼ Show 20 Lines	if (TargetDecl) {
if (auto *AllocSize = TargetDecl->getAttr<AllocSizeAttr>()) {		if (auto *AllocSize = TargetDecl->getAttr<AllocSizeAttr>()) {
std::optional<unsigned> NumElemsParam;		std::optional<unsigned> NumElemsParam;
if (AllocSize->getNumElemsParam().isValid())		if (AllocSize->getNumElemsParam().isValid())
NumElemsParam = AllocSize->getNumElemsParam().getLLVMIndex();		NumElemsParam = AllocSize->getNumElemsParam().getLLVMIndex();
FuncAttrs.addAllocSizeAttr(AllocSize->getElemSizeParam().getLLVMIndex(),		FuncAttrs.addAllocSizeAttr(AllocSize->getElemSizeParam().getLLVMIndex(),
NumElemsParam);		NumElemsParam);
}		}

if (TargetDecl->hasAttr<OpenCLKernelAttr>()) {		if (TargetDecl->hasAttr<OpenCLKernelAttr>()) {
		scchanUnsubmitted Done Reply Inline Actions The block here needs to be aware of this new flag. scchan: The block here needs to be aware of this new flag.
		yaxunlAuthorUnsubmitted Done Reply Inline Actions Now that `-foffload-uniform-block` has the same default value as `-cl-uniform-work-group-size` for OpenCL. we can make `-cl-uniform-work-group-size` an alias to `-foffload-uniform-block`. will update yaxunl: Now that `-foffload-uniform-block` has the same default value as `-cl-uniform-work-group-size`…
if (getLangOpts().OpenCLVersion <= 120) {		if (getLangOpts().OpenCLVersion <= 120) {
// OpenCL v1.2 Work groups are always uniform		// OpenCL v1.2 Work groups are always uniform
FuncAttrs.addAttribute("uniform-work-group-size", "true");		FuncAttrs.addAttribute("uniform-work-group-size", "true");
} else {		} else {
// OpenCL v2.0 Work groups may be whether uniform or not.		// OpenCL v2.0 Work groups may be whether uniform or not.
// '-cl-uniform-work-group-size' compile option gets a hint		// '-cl-uniform-work-group-size' compile option gets a hint
// to the compiler that the global work-size be a multiple of		// to the compiler that the global work-size be a multiple of
// the work-group size specified to clEnqueueNDRangeKernel		// the work-group size specified to clEnqueueNDRangeKernel
// (i.e. work groups are uniform).		// (i.e. work groups are uniform).
FuncAttrs.addAttribute("uniform-work-group-size",		FuncAttrs.addAttribute(
llvm::toStringRef(CodeGenOpts.UniformWGSize));		"uniform-work-group-size",
		llvm::toStringRef(getLangOpts().OffloadUniformBlock));
}		}
}		}

		if (TargetDecl->hasAttr<CUDAGlobalAttr>() &&
		getLangOpts().OffloadUniformBlock)
		FuncAttrs.addAttribute("uniform-work-group-size", "true");
}		}

// Attach "no-builtins" attributes to:		// Attach "no-builtins" attributes to:
// * call sites: both `nobuiltin` and "no-builtins" or "no-builtin-<name>".		// * call sites: both `nobuiltin` and "no-builtins" or "no-builtin-<name>".
// * definitions: "no-builtins" or "no-builtin-<name>" only.		// * definitions: "no-builtins" or "no-builtin-<name>" only.
// The attributes can come from:		// The attributes can come from:
// * LangOpts: -ffreestanding, -fno-builtin, -fno-builtin-<name>		// * LangOpts: -ffreestanding, -fno-builtin, -fno-builtin-<name>
// * FunctionDecl attributes: __attribute__((no_builtin(...)))		// * FunctionDecl attributes: __attribute__((no_builtin(...)))
▲ Show 20 Lines • Show All 3,406 Lines • Show Last 20 Lines

clang/lib/CodeGen/Targets/AMDGPU.cpp

Show First 20 Lines • Show All 360 Lines • ▼ Show 20 Lines	void AMDGPUTargetCodeGenInfo::setTargetAttributes(
llvm::Function *F = dyn_cast<llvm::Function>(GV);		llvm::Function *F = dyn_cast<llvm::Function>(GV);
if (!F)		if (!F)
return;		return;

const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);		const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (FD)		if (FD)
setFunctionDeclAttributes(FD, F, M);		setFunctionDeclAttributes(FD, F, M);

const bool IsHIPKernel =
M.getLangOpts().HIP && FD && FD->hasAttr<CUDAGlobalAttr>();

// TODO: This should be moved to language specific attributes instead.
if (IsHIPKernel)
F->addFnAttr("uniform-work-group-size", "true");

if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics())		if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics())
F->addFnAttr("amdgpu-unsafe-fp-atomics", "true");		F->addFnAttr("amdgpu-unsafe-fp-atomics", "true");

if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts)		if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts)
F->addFnAttr("amdgpu-ieee", "false");		F->addFnAttr("amdgpu-ieee", "false");
}		}

unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const {		unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
▲ Show 20 Lines • Show All 224 Lines • Show Last 20 Lines

clang/lib/Driver/ToolChains/Clang.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 7,274 Lines • ▼ Show 20 Lines	Args.claimAllArgs(options::OPT_fgpu_approx_transcendentals,
options::OPT_fno_gpu_approx_transcendentals);		options::OPT_fno_gpu_approx_transcendentals);
}		}

if (IsHIP) {		if (IsHIP) {
CmdArgs.push_back("-fcuda-allow-variadic-functions");		CmdArgs.push_back("-fcuda-allow-variadic-functions");
Args.AddLastArg(CmdArgs, options::OPT_fgpu_default_stream_EQ);		Args.AddLastArg(CmdArgs, options::OPT_fgpu_default_stream_EQ);
}		}

		Args.AddLastArg(CmdArgs, options::OPT_foffload_uniform_block,
		options::OPT_fno_offload_uniform_block);
		MaskRayUnsubmitted Done Reply Inline Actions Why is the -Wunused-command-line-argument warning suppressed in non-IsHIP mode? MaskRay: Why is the -Wunused-command-line-argument warning suppressed in non-IsHIP mode?
		yaxunlAuthorUnsubmitted Done Reply Inline Actions Users may want to add these options to clang config file. Is there a general rule which options should be claimed? yaxunl: Users may want to add these options to clang config file. Is there a general rule which…
		MaskRayUnsubmitted Done Reply Inline Actions Options in a configuration file are automatically claimed. I don't know a general rule, but we generally don't claim newly introduced options. MaskRay: Options in a configuration file are automatically claimed. I don't know a general rule, but we…
		yaxunlAuthorUnsubmitted Done Reply Inline Actions I think I should remove the claimAllArgs for this option. It should behave like the usual options when not used. yaxunl: I think I should remove the claimAllArgs for this option. It should behave like the usual…

if (IsCudaDevice \|\| IsHIPDevice) {		if (IsCudaDevice \|\| IsHIPDevice) {
StringRef InlineThresh =		StringRef InlineThresh =
Args.getLastArgValue(options::OPT_fgpu_inline_threshold_EQ);		Args.getLastArgValue(options::OPT_fgpu_inline_threshold_EQ);
if (!InlineThresh.empty()) {		if (!InlineThresh.empty()) {
std::string ArgStr =		std::string ArgStr =
std::string("-inline-threshold=") + InlineThresh.str();		std::string("-inline-threshold=") + InlineThresh.str();
CmdArgs.append({"-mllvm", Args.MakeArgStringRef(ArgStr)});		CmdArgs.append({"-mllvm", Args.MakeArgStringRef(ArgStr)});
}		}
▲ Show 20 Lines • Show All 1,433 Lines • Show Last 20 Lines

clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu

	// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \			// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
	// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \			// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \
	// RUN: \| FileCheck -check-prefixes=CHECK,DEFAULT %s			// RUN: \| FileCheck -check-prefixes=CHECK,DEFAULT %s
	// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa --gpu-max-threads-per-block=1024 \			// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa --gpu-max-threads-per-block=1024 \
	// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \			// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \
	// RUN: \| FileCheck -check-prefixes=CHECK,MAX1024 %s			// RUN: \| FileCheck -check-prefixes=CHECK,MAX1024 %s
	// RUN: %clang_cc1 -triple nvptx \			// RUN: %clang_cc1 -triple nvptx \
	// RUN: -fcuda-is-device -emit-llvm -o - %s \| FileCheck %s \			// RUN: -fcuda-is-device -emit-llvm -o - %s \| FileCheck %s \
	// RUN: -check-prefix=NAMD			// RUN: -check-prefix=NAMD
	// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \			// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \
	// RUN: -verify -o - -x hip %s \| FileCheck -check-prefix=NAMD %s			// RUN: -verify -o - -x hip %s \| FileCheck -check-prefix=NAMD %s

				// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -foffload-uniform-block \
				// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \
				// RUN: \| FileCheck -check-prefixes=CHECK,DEFAULT %s
				// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fno-offload-uniform-block \
				// RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \
				// RUN: \| FileCheck -check-prefixes=NOUB %s

	#include "Inputs/cuda.h"			#include "Inputs/cuda.h"

	__global__ void flat_work_group_size_default() {			__global__ void flat_work_group_size_default() {
	// CHECK: define{{.*}} amdgpu_kernel void @_Z28flat_work_group_size_defaultv() [[FLAT_WORK_GROUP_SIZE_DEFAULT:#[0-9]+]]			// CHECK: define{{.*}} amdgpu_kernel void @_Z28flat_work_group_size_defaultv() [[FLAT_WORK_GROUP_SIZE_DEFAULT:#[0-9]+]]
				// NOUB: define{{.*}} void @_Z28flat_work_group_size_defaultv() [[NOUB:#[0-9]+]]
	}			}

	__attribute__((amdgpu_flat_work_group_size(32, 64))) // expected-no-diagnostics			__attribute__((amdgpu_flat_work_group_size(32, 64))) // expected-no-diagnostics
	__global__ void flat_work_group_size_32_64() {			__global__ void flat_work_group_size_32_64() {
	// CHECK: define{{.*}} amdgpu_kernel void @_Z26flat_work_group_size_32_64v() [[FLAT_WORK_GROUP_SIZE_32_64:#[0-9]+]]			// CHECK: define{{.*}} amdgpu_kernel void @_Z26flat_work_group_size_32_64v() [[FLAT_WORK_GROUP_SIZE_32_64:#[0-9]+]]
	}			}
	__attribute__((amdgpu_waves_per_eu(2))) // expected-no-diagnostics			__attribute__((amdgpu_waves_per_eu(2))) // expected-no-diagnostics
	__global__ void waves_per_eu_2() {			__global__ void waves_per_eu_2() {
	Show All 15 Lines
	// NAMD-NOT: "amdgpu-num-sgpr"			// NAMD-NOT: "amdgpu-num-sgpr"

	// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.}}"amdgpu-flat-work-group-size"="1,1024"{{.}}"uniform-work-group-size"="true"			// DEFAULT-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.}}"amdgpu-flat-work-group-size"="1,1024"{{.}}"uniform-work-group-size"="true"
	// MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"			// MAX1024-DAG: attributes [[FLAT_WORK_GROUP_SIZE_DEFAULT]] = {{.*}}"amdgpu-flat-work-group-size"="1,1024"
	// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = {{.*}}"amdgpu-flat-work-group-size"="32,64"			// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = {{.*}}"amdgpu-flat-work-group-size"="32,64"
	// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = {{.*}}"amdgpu-waves-per-eu"="2"			// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = {{.*}}"amdgpu-waves-per-eu"="2"
	// CHECK-DAG: attributes [[NUM_SGPR_32]] = {{.*}}"amdgpu-num-sgpr"="32"			// CHECK-DAG: attributes [[NUM_SGPR_32]] = {{.*}}"amdgpu-num-sgpr"="32"
	// CHECK-DAG: attributes [[NUM_VGPR_64]] = {{.*}}"amdgpu-num-vgpr"="64"			// CHECK-DAG: attributes [[NUM_VGPR_64]] = {{.*}}"amdgpu-num-vgpr"="64"

				// NOUB-NOT: "uniform-work-group-size"="true"

clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl

	// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -o - %s 2>&1 \| FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM			// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL1.2 -o - %s 2>&1 \| FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM
	// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -o - %s 2>&1 \| FileCheck %s -check-prefixes CHECK,CHECK-NONUNIFORM			// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -o - %s 2>&1 \| FileCheck %s -check-prefixes CHECK,CHECK-NONUNIFORM
	// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -cl-uniform-work-group-size -o - %s 2>&1 \| FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM			// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -cl-uniform-work-group-size -o - %s 2>&1 \| FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM
				// RUN: %clang_cc1 -emit-llvm -O0 -cl-std=CL2.0 -foffload-uniform-block -o - %s 2>&1 \| FileCheck %s -check-prefixes CHECK,CHECK-UNIFORM

	kernel void ker() {};			kernel void ker() {};
	// CHECK: define{{.*}}@ker() #0			// CHECK: define{{.*}}@ker() #0

	void foo() {};			void foo() {};
	// CHECK: define{{.*}}@foo() #1			// CHECK: define{{.*}}@foo() #1

	// CHECK-LABEL: attributes #0			// CHECK-LABEL: attributes #0
	// CHECK-UNIFORM: "uniform-work-group-size"="true"			// CHECK-UNIFORM: "uniform-work-group-size"="true"
	// CHECK-NONUNIFORM: "uniform-work-group-size"="false"			// CHECK-NONUNIFORM: "uniform-work-group-size"="false"

	// CHECK-LABEL: attributes #1			// CHECK-LABEL: attributes #1
	// CHECK-NOT: uniform-work-group-size			// CHECK-NOT: uniform-work-group-size

clang/test/Driver/hip-options.hip

	Show First 20 Lines • Show All 199 Lines • ▼ Show 20 Lines

	// Check no warnings for -fgpu-approx-transcendentals.			// Check no warnings for -fgpu-approx-transcendentals.

	// RUN: %clang -fdriver-only -Werror --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib -fgpu-approx-transcendentals \			// RUN: %clang -fdriver-only -Werror --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib -fgpu-approx-transcendentals \
	// RUN: --cuda-gpu-arch=gfx906 %s 2>&1 \| count 0			// RUN: --cuda-gpu-arch=gfx906 %s 2>&1 \| count 0

	// RUN: %clang -fdriver-only -Werror --target=x86_64-unknown-linux-gnu -nostdinc -nostdlib -fgpu-approx-transcendentals \			// RUN: %clang -fdriver-only -Werror --target=x86_64-unknown-linux-gnu -nostdinc -nostdlib -fgpu-approx-transcendentals \
	// RUN: -x c++ %s 2>&1 \| count 0			// RUN: -x c++ %s 2>&1 \| count 0
				/ Check -fno-offload-uniform-block is passed to clang -cc1 but
				// (default) -fno-offload-uniform-block is not.

				// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib -fno-offload-uniform-block \
				// RUN: --cuda-gpu-arch=gfx906 %s 2>&1 \| FileCheck -check-prefix=NOUNIBLK %s

				// NOUNIBLK: "-cc1"{{.}} "-triple" "amdgcn-amd-amdhsa" {{.}} "-fno-offload-uniform-block"
				// NOUNIBLK: "-cc1"{{.}} "-triple" "x86_64-unknown-linux-gnu" {{.}} "-fno-offload-uniform-block"

				// RUN: %clang -### -nogpuinc -nogpulib -foffload-uniform-block \
				// RUN: --cuda-gpu-arch=gfx906 %s 2>&1 \| FileCheck -check-prefix=UNIBLK %s

				// UNIBLK: "-cc1"{{.}} "-triple" "amdgcn-amd-amdhsa" {{.}} "-foffload-uniform-block"
				// UNIBLK: "-cc1"{{.}} "-triple" "x86_64-unknown-linux-gnu" {{.}} "-foffload-uniform-block"

				// RUN: %clang -### -nogpuinc -nogpulib \
				// RUN: --cuda-gpu-arch=gfx906 %s 2>&1 \| FileCheck -check-prefix=DEFUNIBLK %s

				// DEFUNIBLK-NOT: "-f{{(no-)?}}offload-uniform-block"

				// Check no warnings for -f[no-]offload-uniform-block.

				// RUN: %clang -fdriver-only -Werror --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib -fno-offload-uniform-block \
				// RUN: -foffload-uniform-block --cuda-gpu-arch=gfx906 %s 2>&1 \| count 0

clang/test/Driver/opencl.cl

	Show All 11 Lines
	// RUN: %clang -S -### -cl-kernel-arg-info %s 2>&1 \| FileCheck --check-prefix=CHECK-KERNEL-ARG-INFO %s			// RUN: %clang -S -### -cl-kernel-arg-info %s 2>&1 \| FileCheck --check-prefix=CHECK-KERNEL-ARG-INFO %s
	// RUN: %clang -S -### -cl-unsafe-math-optimizations %s 2>&1 \| FileCheck --check-prefix=CHECK-UNSAFE-MATH-OPT %s			// RUN: %clang -S -### -cl-unsafe-math-optimizations %s 2>&1 \| FileCheck --check-prefix=CHECK-UNSAFE-MATH-OPT %s
	// RUN: %clang -S -### -cl-fast-relaxed-math %s 2>&1 \| FileCheck --check-prefix=CHECK-FAST-RELAXED-MATH %s			// RUN: %clang -S -### -cl-fast-relaxed-math %s 2>&1 \| FileCheck --check-prefix=CHECK-FAST-RELAXED-MATH %s
	// RUN: %clang -S -### -cl-mad-enable %s 2>&1 \| FileCheck --check-prefix=CHECK-MAD-ENABLE %s			// RUN: %clang -S -### -cl-mad-enable %s 2>&1 \| FileCheck --check-prefix=CHECK-MAD-ENABLE %s
	// RUN: %clang -S -### -cl-no-signed-zeros %s 2>&1 \| FileCheck --check-prefix=CHECK-NO-SIGNED-ZEROS %s			// RUN: %clang -S -### -cl-no-signed-zeros %s 2>&1 \| FileCheck --check-prefix=CHECK-NO-SIGNED-ZEROS %s
	// RUN: %clang -S -### -cl-denorms-are-zero %s 2>&1 \| FileCheck --check-prefix=CHECK-DENORMS-ARE-ZERO %s			// RUN: %clang -S -### -cl-denorms-are-zero %s 2>&1 \| FileCheck --check-prefix=CHECK-DENORMS-ARE-ZERO %s
	// RUN: %clang -S -### -cl-fp32-correctly-rounded-divide-sqrt %s 2>&1 \| FileCheck --check-prefix=CHECK-ROUND-DIV %s			// RUN: %clang -S -### -cl-fp32-correctly-rounded-divide-sqrt %s 2>&1 \| FileCheck --check-prefix=CHECK-ROUND-DIV %s
	// RUN: %clang -S -### -cl-uniform-work-group-size %s 2>&1 \| FileCheck --check-prefix=CHECK-UNIFORM-WG %s			// RUN: %clang -S -### -cl-uniform-work-group-size %s 2>&1 \| FileCheck --check-prefix=CHECK-UNIFORM-WG %s
				// RUN: %clang -S -### -foffload-uniform-block %s 2>&1 \| FileCheck --check-prefix=CHECK-UNIFORM-WG %s
				// RUN: %clang -S -### -fno-offload-uniform-block -cl-uniform-work-group-size %s 2>&1 \| FileCheck --check-prefix=CHECK-UNIFORM-WG %s
	// RUN: not %clang -cl-std=c99 -DOPENCL %s 2>&1 \| FileCheck --check-prefix=CHECK-C99 %s			// RUN: not %clang -cl-std=c99 -DOPENCL %s 2>&1 \| FileCheck --check-prefix=CHECK-C99 %s
	// RUN: not %clang -cl-std=invalid -DOPENCL %s 2>&1 \| FileCheck --check-prefix=CHECK-INVALID %s			// RUN: not %clang -cl-std=invalid -DOPENCL %s 2>&1 \| FileCheck --check-prefix=CHECK-INVALID %s
	// RUN: %clang -S -### -target spir-unknown-unknown %s 2>&1 \| FileCheck --check-prefix=CHECK-W-SPIR-COMPAT %s			// RUN: %clang -S -### -target spir-unknown-unknown %s 2>&1 \| FileCheck --check-prefix=CHECK-W-SPIR-COMPAT %s
	// RUN: %clang -S -### -target amdgcn-amd-amdhsa-opencl %s 2>&1 \| FileCheck --check-prefix=CHECK-NO-W-SPIR-COMPAT %s			// RUN: %clang -S -### -target amdgcn-amd-amdhsa-opencl %s 2>&1 \| FileCheck --check-prefix=CHECK-NO-W-SPIR-COMPAT %s
	// RUN: %clang -S -### -cl-ext="+test_ext" %s 2>&1 \| FileCheck --check-prefix=CHECK-EXT %s			// RUN: %clang -S -### -cl-ext="+test_ext" %s 2>&1 \| FileCheck --check-prefix=CHECK-EXT %s

	// CHECK-CL: "-cc1" {{.*}} "-cl-std=CL"			// CHECK-CL: "-cc1" {{.*}} "-cl-std=CL"
	// CHECK-CL10: "-cc1" {{.*}} "-cl-std=CL1.0"			// CHECK-CL10: "-cc1" {{.*}} "-cl-std=CL1.0"
	Show All 11 Lines
	// CHECK-FAST-RELAXED-MATH: "-cc1" {{.*}} "-cl-fast-relaxed-math"			// CHECK-FAST-RELAXED-MATH: "-cc1" {{.*}} "-cl-fast-relaxed-math"
	// CHECK-MAD-ENABLE: "-cc1" {{.*}} "-cl-mad-enable"			// CHECK-MAD-ENABLE: "-cc1" {{.*}} "-cl-mad-enable"
	// CHECK-NO-SIGNED-ZEROS: "-cc1" {{.*}} "-cl-no-signed-zeros"			// CHECK-NO-SIGNED-ZEROS: "-cc1" {{.*}} "-cl-no-signed-zeros"

	// This is not forwarded			// This is not forwarded
	// CHECK-DENORMS-ARE-ZERO-NOT: "-cl-denorms-are-zero"			// CHECK-DENORMS-ARE-ZERO-NOT: "-cl-denorms-are-zero"

	// CHECK-ROUND-DIV: "-cc1" {{.*}} "-cl-fp32-correctly-rounded-divide-sqrt"			// CHECK-ROUND-DIV: "-cc1" {{.*}} "-cl-fp32-correctly-rounded-divide-sqrt"
	// CHECK-UNIFORM-WG: "-cc1" {{.*}} "-cl-uniform-work-group-size"			// CHECK-UNIFORM-WG: "-cc1" {{.*}} "-foffload-uniform-block"
	// CHECK-C99: error: invalid value 'c99' in '-cl-std=c99'			// CHECK-C99: error: invalid value 'c99' in '-cl-std=c99'
	// CHECK-INVALID: error: invalid value 'invalid' in '-cl-std=invalid'			// CHECK-INVALID: error: invalid value 'invalid' in '-cl-std=invalid'

	// CHECK-W-SPIR-COMPAT: "-Wspir-compat"			// CHECK-W-SPIR-COMPAT: "-Wspir-compat"
	// CHECK-NO-W-SPIR-COMPAT-NOT: "-Wspir-compat"			// CHECK-NO-W-SPIR-COMPAT-NOT: "-Wspir-compat"

	// CHECK-EXT: "-cc1" {{.*}} "-cl-ext=+test_ext"			// CHECK-EXT: "-cc1" {{.*}} "-cl-ext=+test_ext"

	kernel void func(void);			kernel void func(void);

This is an archive of the discontinued LLVM Phabricator instance.

[Driver] Add `-f[no-]offload-uniform-block`
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 544910

clang/include/clang/Basic/CodeGenOptions.def

clang/include/clang/Basic/LangOptions.def

clang/include/clang/Driver/Options.td

clang/lib/CodeGen/CGCall.cpp

clang/lib/CodeGen/Targets/AMDGPU.cpp

clang/lib/Driver/ToolChains/Clang.cpp

clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu

clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl

clang/test/Driver/hip-options.hip

clang/test/Driver/opencl.cl

This is an archive of the discontinued LLVM Phabricator instance.

[Driver] Add `-f[no-]offload-uniform-block`ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 544910

clang/include/clang/Basic/CodeGenOptions.def

clang/include/clang/Basic/LangOptions.def

clang/include/clang/Driver/Options.td

clang/lib/CodeGen/CGCall.cpp

clang/lib/CodeGen/Targets/AMDGPU.cpp

clang/lib/Driver/ToolChains/Clang.cpp

clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu

clang/test/CodeGenOpenCL/cl-uniform-wg-size.cl

clang/test/Driver/hip-options.hip

clang/test/Driver/opencl.cl

[Driver] Add `-f[no-]offload-uniform-block`
ClosedPublic