diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -814,6 +814,9 @@ Linkage, /*Initializer=*/llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__hip_gpubin_handle"); + if (Linkage == llvm::GlobalValue::LinkOnceAnyLinkage) + GpuBinaryHandle->setComdat( + CGM.getModule().getOrInsertComdat(GpuBinaryHandle->getName())); GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getAsAlign()); // Prevent the weak symbol in different shared libraries being merged. if (Linkage != llvm::GlobalValue::InternalLinkage) diff --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp --- a/clang/lib/Driver/ToolChains/HIP.cpp +++ b/clang/lib/Driver/ToolChains/HIP.cpp @@ -183,8 +183,7 @@ const InputInfoList &Inputs, const ArgList &Args, const JobAction &JA) const { const ToolChain &TC = getToolChain(); - std::string Name = - std::string(llvm::sys::path::stem(Output.getFilename())); + std::string Name = std::string(llvm::sys::path::stem(Output.getFilename())); // Create Temp Object File Generator, // Offload Bundled file and Bundled Object file. @@ -206,20 +205,29 @@ std::string ObjBuffer; llvm::raw_string_ostream ObjStream(ObjBuffer); + auto HostTriple = + C.getSingleOffloadToolChain()->getTriple(); + // Add MC directives to embed target binaries. We ensure that each // section and image is 16-byte aligned. This is not mandatory, but // increases the likelihood of data to be aligned with a cache block // in several main host machines. ObjStream << "# HIP Object Generator\n"; ObjStream << "# *** Automatically generated by Clang ***\n"; - ObjStream << " .protected __hip_fatbin\n"; - ObjStream << " .type __hip_fatbin,@object\n"; - ObjStream << " .section .hip_fatbin,\"a\",@progbits\n"; + if (HostTriple.isWindowsMSVCEnvironment()) { + ObjStream << " .section .hip_fatbin, \"dw\"\n"; + } else { + ObjStream << " .protected __hip_fatbin\n"; + ObjStream << " .type __hip_fatbin,@object\n"; + ObjStream << " .section .hip_fatbin,\"a\",@progbits\n"; + } ObjStream << " .globl __hip_fatbin\n"; ObjStream << " .p2align " << llvm::Log2(llvm::Align(HIPCodeObjectAlign)) << "\n"; ObjStream << "__hip_fatbin:\n"; - ObjStream << " .incbin \"" << BundleFile << "\"\n"; + ObjStream << " .incbin "; + llvm::sys::printArg(ObjStream, BundleFile, /*Quote=*/true); + ObjStream << "\n"; ObjStream.flush(); // Dump the contents of the temp object file gen if the user requested that. @@ -238,7 +246,8 @@ Objf << ObjBuffer; - ArgStringList McArgs{"-o", Output.getFilename(), + ArgStringList McArgs{"-triple", Args.MakeArgString(HostTriple.normalize()), + "-o", Output.getFilename(), McinFile, "--filetype=obj"}; const char *Mc = Args.MakeArgString(TC.GetProgramPath("llvm-mc")); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::None(), diff --git a/clang/test/CodeGenCUDA/device-stub.cu b/clang/test/CodeGenCUDA/device-stub.cu --- a/clang/test/CodeGenCUDA/device-stub.cu +++ b/clang/test/CodeGenCUDA/device-stub.cu @@ -57,8 +57,14 @@ // RUN: -fcuda-include-gpubinary %t -o - -x hip\ // RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,WIN +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -aux-triple amdgcn -emit-llvm %s \ +// RUN: -o - -x hip\ +// RUN: | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=ALL,WIN,HIP,HIPNEF + #include "Inputs/cuda.h" +// HIPNEF: $__hip_gpubin_handle = comdat any + #ifndef NOGLOBALS // NORDC-DAG: @device_var = internal global i32 // RDC-DAG: @device_var = global i32 diff --git a/clang/test/Driver/hip-toolchain-rdc.hip b/clang/test/Driver/hip-toolchain-rdc.hip --- a/clang/test/Driver/hip-toolchain-rdc.hip +++ b/clang/test/Driver/hip-toolchain-rdc.hip @@ -11,14 +11,31 @@ // RUN: -fhip-dump-offload-linker-script \ // RUN: %S/Inputs/hip_multiple_inputs/a.cu \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ -// RUN: 2>&1 | FileCheck %s +// RUN: 2>&1 | FileCheck -check-prefixes=CHECK,LNX %s + +// RUN: %clang -### -target x86_64-pc-windows-msvc \ +// RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \ +// RUN: --hip-device-lib=lib1.bc --hip-device-lib=lib2.bc \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib1 \ +// RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib2 \ +// RUN: -fuse-ld=lld -fgpu-rdc -nogpuinc \ +// RUN: -fhip-dump-offload-linker-script \ +// RUN: %S/Inputs/hip_multiple_inputs/a.cu \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck -check-prefixes=CHECK,MSVC %s // check code object alignment in dumped llvm-mc input -// CHECK: .protected __hip_fatbin +// LNX: .protected __hip_fatbin +// LNX: .type __hip_fatbin,@object +// LNX: .section .hip_fatbin,"a",@progbits +// MSVC: .section .hip_fatbin, "dw" +// CHECK: .globl __hip_fatbin // CHECK: .p2align 12 +// CHECK: __hip_fatbin: +// CHECK: .incbin "[[BUNDLE:.*hipfb]]" // emit objects for host side path -// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "x86_64-unknown-linux-gnu" +// CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" [[HOST:"x86_64-[^"]+"]] // CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-emit-obj" // CHECK-SAME: {{.*}} "-main-file-name" "a.cu" @@ -26,7 +43,7 @@ // CHECK-SAME: {{.*}} "-o" [[A_OBJ_HOST:".*o"]] "-x" "hip" // CHECK-SAME: {{.*}} [[A_SRC:".*a.cu"]] -// CHECK: [[CLANG]] "-cc1" "-triple" "x86_64-unknown-linux-gnu" +// CHECK: [[CLANG]] "-cc1" "-triple" [[HOST]] // CHECK-SAME: "-aux-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-emit-obj" // CHECK-SAME: {{.*}} "-main-file-name" "b.hip" @@ -36,7 +53,7 @@ // generate image for device side path on gfx803 // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" -// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" +// CHECK-SAME: "-aux-triple" [[HOST:"x86_64-[^"]+"]] // CHECK-SAME: "-emit-llvm-bc" // CHECK-SAME: {{.*}} "-main-file-name" "a.cu" // CHECK-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden" @@ -48,7 +65,7 @@ // CHECK-SAME: {{.*}} [[A_SRC]] // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" -// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" +// CHECK-SAME: "-aux-triple" [[HOST]] // CHECK-SAME: "-emit-llvm-bc" // CHECK-SAME: {{.*}} "-main-file-name" "b.hip" // CHECK-SAME: "-fcuda-is-device" "-fcuda-allow-variadic-functions" "-fvisibility" "hidden" @@ -68,7 +85,7 @@ // generate image for device side path on gfx900 // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" -// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" +// CHECK-SAME: "-aux-triple" [[HOST]] // CHECK-SAME: "-emit-llvm-bc" // CHECK-SAME: {{.*}} "-main-file-name" "a.cu" // CHECK-SAME: "-fcuda-is-device" @@ -78,7 +95,7 @@ // CHECK-SAME: {{.*}} [[A_SRC]] // CHECK: [[CLANG]] "-cc1" "-triple" "amdgcn-amd-amdhsa" -// CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" +// CHECK-SAME: "-aux-triple" [[HOST]] // CHECK-SAME: "-emit-llvm-bc" // CHECK-SAME: {{.*}} "-main-file-name" "b.hip" // CHECK-SAME: "-fcuda-is-device" @@ -99,9 +116,10 @@ // CHECK: [[BUNDLER:".*clang-offload-bundler"]] "-type=o" // CHECK-SAME: "-bundle-align=4096" // CHECK-SAME: "-targets={{.*}},hipv4-amdgcn-amd-amdhsa--gfx803,hipv4-amdgcn-amd-amdhsa--gfx900" -// CHECK-SAME: "-inputs={{.*}},[[IMG_DEV1]],[[IMG_DEV2]]" "-outputs=[[BUNDLE:.*hipfb]]" +// CHECK-SAME: "-inputs={{.*}},[[IMG_DEV1]],[[IMG_DEV2]]" "-outputs=[[BUNDLE]]" -// CHECK: [[MC:".*llvm-mc.*"]] "-o" [[OBJBUNDLE:".*o"]] "{{.*}}.mcin" "--filetype=obj" +// CHECK: [[MC:".*llvm-mc.*"]] "-triple" [[HOST]] "-o" [[OBJBUNDLE:".*o"]] "{{.*}}.mcin" "--filetype=obj" // output the executable -// CHECK: [[LD:".*ld.*"]] {{.*}}"-o" "a.out" {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] [[OBJBUNDLE]] +// LNX: [[LD:".*ld.*"]] {{.*}}"-o" "a.out" {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] [[OBJBUNDLE]] +// MSVC: [[LD:".*lld-link.*"]] {{.*}}"-out:a.exe" {{.*}} [[A_OBJ_HOST]] [[B_OBJ_HOST]] [[OBJBUNDLE]]