@@ -1383,15 +1383,36 @@ isl_bool collectReferencesInGPUStmt(__isl_keep isl_ast_node *Node, void *User) {
1383
1383
1384
1384
// / A list of functions that are available in NVIDIA's libdevice.
1385
1385
const std::set<std::string> CUDALibDeviceFunctions = {
1386
- " exp" , " expf" , " expl" , " cos" , " cosf" , " sqrt" ,
1387
- " sqrtf" , " copysign" , " copysignf" , " copysignl" , " log" , " logf" };
1386
+ " exp" , " expf" , " expl" , " cos" , " cosf" , " sqrt" , " sqrtf" ,
1387
+ " copysign" , " copysignf" , " copysignl" , " log" , " logf" , " powi" , " powif" };
1388
+
1389
+ // A map from intrinsics to their corresponding libdevice functions.
1390
+ const std::map<std::string, std::string> IntrinsicToLibdeviceFunc = {
1391
+ {" llvm.exp.f64" , " exp" },
1392
+ {" llvm.exp.f32" , " expf" },
1393
+ {" llvm.powi.f64" , " powi" },
1394
+ {" llvm.powi.f32" , " powif" }};
1388
1395
1389
1396
// / Return the corresponding CUDA libdevice function name for @p F.
1397
+ // / Note that this function will try to convert instrinsics in the list
1398
+ // / IntrinsicToLibdeviceFunc into libdevice functions.
1399
+ // / This is because some intrinsics such as `exp`
1400
+ // / are not supported by the NVPTX backend.
1401
+ // / If this restriction of the backend is lifted, we should refactor our code
1402
+ // / so that we use intrinsics whenever possible.
1390
1403
// /
1391
1404
// / Return "" if we are not compiling for CUDA.
1392
1405
std::string getCUDALibDeviceFuntion (Function *F) {
1393
- if (CUDALibDeviceFunctions.count (F->getName ()))
1394
- return std::string (" __nv_" ) + std::string (F->getName ());
1406
+ const std::string FnName = [&] {
1407
+ auto It = IntrinsicToLibdeviceFunc.find (F->getName ());
1408
+ if (It != IntrinsicToLibdeviceFunc.end ())
1409
+ return It->second ;
1410
+
1411
+ return std::string (F->getName ());
1412
+ }();
1413
+
1414
+ if (CUDALibDeviceFunctions.count (FnName))
1415
+ return " __nv_" + FnName;
1395
1416
1396
1417
return " " ;
1397
1418
}
@@ -1409,7 +1430,7 @@ static bool isValidFunctionInKernel(llvm::Function *F, bool AllowLibDevice) {
1409
1430
1410
1431
return F->isIntrinsic () &&
1411
1432
(Name.startswith (" llvm.sqrt" ) || Name.startswith (" llvm.fabs" ) ||
1412
- Name.startswith (" llvm.copysign" ) || Name. startswith ( " llvm.powi " ) );
1433
+ Name.startswith (" llvm.copysign" ));
1413
1434
}
1414
1435
1415
1436
// / Do not take `Function` as a subtree value.
@@ -2362,9 +2383,22 @@ bool GPUNodeBuilder::requiresCUDALibDevice() {
2362
2383
if (!F.isDeclaration ())
2363
2384
continue ;
2364
2385
2365
- std::string CUDALibDeviceFunc = getCUDALibDeviceFuntion (&F);
2386
+ const std::string CUDALibDeviceFunc = getCUDALibDeviceFuntion (&F);
2366
2387
if (CUDALibDeviceFunc.length () != 0 ) {
2367
- F.setName (CUDALibDeviceFunc);
2388
+ // We need to handle the case where a module looks like this:
2389
+ // @expf(..)
2390
+ // @llvm.exp.f64(..)
2391
+ // Both of these functions would be renamed to `__nv_expf`.
2392
+ //
2393
+ // So, we must first check for the existence of the libdevice function.
2394
+ // If this exists, we replace our current function with it.
2395
+ //
2396
+ // If it does not exist, we rename the current function to the
2397
+ // libdevice functiono name.
2398
+ if (Function *Replacement = F.getParent ()->getFunction (CUDALibDeviceFunc))
2399
+ F.replaceAllUsesWith (Replacement);
2400
+ else
2401
+ F.setName (CUDALibDeviceFunc);
2368
2402
RequiresLibDevice = true ;
2369
2403
}
2370
2404
}
0 commit comments