Changeset View
Standalone View
clang/lib/Sema/SemaCUDA.cpp
Show All 11 Lines | |||||
#include "clang/AST/ASTContext.h" | #include "clang/AST/ASTContext.h" | ||||
#include "clang/AST/Decl.h" | #include "clang/AST/Decl.h" | ||||
#include "clang/AST/ExprCXX.h" | #include "clang/AST/ExprCXX.h" | ||||
#include "clang/Basic/Cuda.h" | #include "clang/Basic/Cuda.h" | ||||
#include "clang/Basic/TargetInfo.h" | #include "clang/Basic/TargetInfo.h" | ||||
#include "clang/Lex/Preprocessor.h" | #include "clang/Lex/Preprocessor.h" | ||||
#include "clang/Sema/Lookup.h" | #include "clang/Sema/Lookup.h" | ||||
#include "clang/Sema/ScopeInfo.h" | |||||
#include "clang/Sema/Sema.h" | #include "clang/Sema/Sema.h" | ||||
#include "clang/Sema/SemaDiagnostic.h" | #include "clang/Sema/SemaDiagnostic.h" | ||||
#include "clang/Sema/SemaInternal.h" | #include "clang/Sema/SemaInternal.h" | ||||
#include "clang/Sema/Template.h" | #include "clang/Sema/Template.h" | ||||
#include "llvm/ADT/Optional.h" | #include "llvm/ADT/Optional.h" | ||||
#include "llvm/ADT/SmallVector.h" | #include "llvm/ADT/SmallVector.h" | ||||
using namespace clang; | using namespace clang; | ||||
▲ Show 20 Lines • Show All 713 Lines • ▼ Show 20 Lines | DeviceDiagBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this) | ||||
<< IdentifyCUDATarget(Callee) << Callee << IdentifyCUDATarget(Caller); | << IdentifyCUDATarget(Callee) << Callee << IdentifyCUDATarget(Caller); | ||||
DeviceDiagBuilder(DiagKind, Callee->getLocation(), diag::note_previous_decl, | DeviceDiagBuilder(DiagKind, Callee->getLocation(), diag::note_previous_decl, | ||||
Caller, *this) | Caller, *this) | ||||
<< Callee; | << Callee; | ||||
return DiagKind != DeviceDiagBuilder::K_Immediate && | return DiagKind != DeviceDiagBuilder::K_Immediate && | ||||
DiagKind != DeviceDiagBuilder::K_ImmediateWithCallStack; | DiagKind != DeviceDiagBuilder::K_ImmediateWithCallStack; | ||||
} | } | ||||
// Check the wrong-sided reference capture of lambda for CUDA/HIP. | |||||
tra: What does the return value mean? We don't seem to check it anyways. If we don't care about the… | |||||
it should return void. fixed. yaxunl: it should return void. fixed. | |||||
// A lambda function may capture a stack variable by reference when it is | |||||
// defined and uses the capture by reference when the lambda is called. When | |||||
// the capture and use happen on different sides, the capture is invalid and | |||||
// should be diagnosed. | |||||
void Sema::CUDACheckLambdaCapture(CXXMethodDecl *Callee, | |||||
const sema::Capture &Capture) { | |||||
// In host compilation we only need to check lambda functions emitted on host | |||||
// side. In such lambda functions, a reference capture is invalid only | |||||
// if the lambda structure is populated by a device function or kernel then | |||||
I don't think this is completely correct. Postponed diags get emitted if we know we're attempoting to codegen wrong things. I don't think that we can reason that checks can be done on the host side only, based only on the argument you're making above (at least based on the way I understand it). The point you're making below that a captured lambda created by device code can't ever be used by the host code is probably a better argument why the check may not be necessary. tra: I don't think this is completely correct. Postponed diags get emitted if we know we're… | |||||
Revised the comments. yaxunl: Revised the comments. | |||||
// is passed to and called by a host function. However that is impossible, | |||||
// since a device function or kernel can only call a device function, also a | |||||
// kernel cannot pass a lambda back to a host function since we cannot | |||||
// define a kernel argument type which can hold the lambda before the lambda | |||||
// itself is defined. | |||||
if (!LangOpts.CUDAIsDevice) | |||||
return; | |||||
// File-scope lambda can only do init captures for global variables, which | |||||
// results in passing by value for these global variables. | |||||
FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext); | |||||
if (!Caller) | |||||
return; | |||||
// In device compilation, we only need to check lambda functions which are | |||||
// emitted on device side. For such lambdas, a reference capture is invalid | |||||
// only if the lambda structure is populated by a host function then passed | |||||
// to and called in a device function or kernel. | |||||
bool CalleeIsDevice = Callee->hasAttr<CUDADeviceAttr>(); | |||||
bool CallerIsHost = | |||||
!Caller->hasAttr<CUDAGlobalAttr>() && !Caller->hasAttr<CUDADeviceAttr>(); | |||||
bool ShouldCheck = CalleeIsDevice && CallerIsHost; | |||||
if (!ShouldCheck || !Capture.isReferenceCapture()) | |||||
return; | |||||
auto DiagKind = DeviceDiagBuilder::K_Deferred; | |||||
if (Capture.isVariableCapture()) { | |||||
DeviceDiagBuilder(DiagKind, Capture.getLocation(), | |||||
diag::err_capture_bad_target, Callee, *this) | |||||
<< Capture.getVariable(); | |||||
} else if (Capture.isThisCapture()) { | |||||
DeviceDiagBuilder(DiagKind, Capture.getLocation(), | |||||
diag::err_capture_bad_target_this_ptr, Callee, *this); | |||||
} | |||||
return; | |||||
} | |||||
void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { | void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { | ||||
assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); | ||||
if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>()) | if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>()) | ||||
return; | return; | ||||
FunctionDecl *CurFn = dyn_cast<FunctionDecl>(CurContext); | |||||
if (!CurFn) | |||||
return; | |||||
CUDAFunctionTarget Target = IdentifyCUDATarget(CurFn); | |||||
if (Target == CFT_Global || Target == CFT_Device) { | |||||
Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); | |||||
} else if (Target == CFT_HostDevice) { | |||||
Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); | Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); | ||||
This check appears to prevent lambdas appearing in any context outside a function from being implicitly HD. Is that what you want? Eg: auto foo = [] {}; // not implicitly HD rsmith: This check appears to prevent lambdas appearing in any context outside a function from being… | |||||
There should at least be a flag to enable capturing lambdas to be implicitly HD. I dont really understand the rational for making capturing lambdas not implicitly HD. It seems like its trying to prevent using an address to host on the device, but I dont see how this prevents that at all. This will also break the compilation in rocm. Should we use a fork of llvm to compile rocm? pfultz2: There should at least be a flag to enable capturing lambdas to be implicitly HD. I dont really… | |||||
Could you give an example to demonstrate current use and how it will break? My understanding that the patch *relaxes* the restrictions on lambdas so in theory not promoting capturing lambdas preserves the status quo. As for the fork, my response would be an empathic "no, please don't do it". Fork == different compiler == showstopper for various use cases. It would definitely be an issue for us at Google. Considering that we're still probing our way towards making lambdas more useful, it may be a bit premature to heavily depend on any particular implementation detail of an experimental feature, even if it happens to work. We'll need to figure out an approach that will be sustainable long-term and forked compiler is a rather large and hard-to-maintain hammer for this. In my experience, adapting source code ends up being more manageable long-term. tra: @pfultz2:
> This will also break the compilation in rocm. Should we use a fork of llvm to… | |||||
Method->addAttr(CUDAHostAttr::CreateImplicit(Context)); | Method->addAttr(CUDAHostAttr::CreateImplicit(Context)); | ||||
What about __global__ lambdas? We probably don't want to add HD attributes on them here. tra: What about `__global__` lambdas? We probably don't want to add HD attributes on them here. | |||||
lambda is not allowed to be kernel. I will add a lit test for that. yaxunl: lambda is not allowed to be kernel. I will add a lit test for that. | |||||
} | } | ||||
Shouldn't we add these attributes if there are no host and device attributes? This seems like it will treat []() __device__ {} as host device. pfultz2: Shouldn't we add these attributes if there are no host and device attributes? This seems like… | |||||
There is check on line 716. We only reach here if there is no device and host attrs on the function. yaxunl: There is check on line 716. We only reach here if there is no device and host attrs on the… | |||||
The reference captures check seems quite strange to me. A copy capture of a pointer could have the same problem, as could a copy capture of a class that contains a reference or a pointer. As could an init-capture. These kinds of quirky language rules are usually more trouble than they're worth. rsmith: The reference captures check seems quite strange to me. A copy capture of a pointer could have… | |||||
Capturing by value is not always an error, only when copying a pointer to a host variable. but this requires a lot more static analysis to diagnose. However, capturing by reference is almost always wrong(at least with the current HIP) when the context is host and the lambda is called on the device. Therefore, we avoid this scenario by not making such lambdas implicitly HD, but the error message may not be quite as clear. It is a quirky language rule, and we could remove this restriction and rely on a warning or static analysis to diagnose the issue. pfultz2: Capturing by value is not always an error, only when copying a pointer to a host variable. but… | |||||
} | |||||
void Sema::checkCUDATargetOverload(FunctionDecl *NewFD, | void Sema::checkCUDATargetOverload(FunctionDecl *NewFD, | ||||
const LookupResult &Previous) { | const LookupResult &Previous) { | ||||
assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); | ||||
CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD); | CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD); | ||||
for (NamedDecl *OldND : Previous) { | for (NamedDecl *OldND : Previous) { | ||||
FunctionDecl *OldFD = OldND->getAsFunction(); | FunctionDecl *OldFD = OldND->getAsFunction(); | ||||
if (!OldFD) | if (!OldFD) | ||||
▲ Show 20 Lines • Show All 53 Lines • Show Last 20 Lines |
What does the return value mean? We don't seem to check it anyways. If we don't care about the result, perhaps the function should be void.
If we do, then it would be good to document its purpose and returned values and, probably, rename it to better indicate what is it it's supposed to check.