diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -1127,8 +1127,13 @@ llvm::OpenMPIRBuilder OMPBuilder(CGM.getModule()); OMPBuilder.initialize(); - StringRef Section = CGM.getLangOpts().HIP ? "hip_offloading_entries" - : "cuda_offloading_entries"; + StringRef Section; + if (CGM.getTriple().isOSBinFormatCOFF()) + Section = CGM.getLangOpts().HIP ? ".hip$OE" : ".cuda$OE"; + else + Section = CGM.getLangOpts().HIP ? "hip_offloading_entries" + : "cuda_offloading_entries"; + for (KernelInfo &I : EmittedKernels) OMPBuilder.emitOffloadingEntry(KernelHandles[I.Kernel], getDeviceSideName(cast(I.D)), 0, diff --git a/clang/test/CodeGenCUDA/offloading-entries.cu b/clang/test/CodeGenCUDA/offloading-entries.cu --- a/clang/test/CodeGenCUDA/offloading-entries.cu +++ b/clang/test/CodeGenCUDA/offloading-entries.cu @@ -5,6 +5,12 @@ // RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fgpu-rdc \ // RUN: --offload-new-driver -emit-llvm -o - -x hip %s | FileCheck \ // RUN: --check-prefix=HIP %s +// RUN: %clang_cc1 -std=c++11 -triple x86_64-win32-gnu -fgpu-rdc \ +// RUN: --offload-new-driver -emit-llvm -o - -x cuda %s | FileCheck \ +// RUN: --check-prefix=CUDA-COFF %s +// RUN: %clang_cc1 -std=c++11 -triple x86_64-win32-gnu -fgpu-rdc \ +// RUN: --offload-new-driver -emit-llvm -o - -x hip %s | FileCheck \ +// RUN: --check-prefix=HIP-COFF %s #include "Inputs/cuda.h" @@ -23,6 +29,20 @@ // HIP: @.omp_offloading.entry_name.2 = internal unnamed_addr constant [2 x i8] c"x\00" // HIP: @.omp_offloading.entry.x = weak constant %struct.__tgt_offload_entry { ptr @x, ptr @.omp_offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "hip_offloading_entries", align 1 //. +// CUDA-COFF: @.omp_offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00" +// CUDA-COFF: @.omp_offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr @.omp_offloading.entry_name, i64 0, i32 0, i32 0 }, section ".cuda$OE", align 1 +// CUDA-COFF: @.omp_offloading.entry_name.1 = internal unnamed_addr constant [8 x i8] c"_Z3barv\00" +// CUDA-COFF: @.omp_offloading.entry._Z3barv = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__barv, ptr @.omp_offloading.entry_name.1, i64 0, i32 0, i32 0 }, section ".cuda$OE", align 1 +// CUDA-COFF: @.omp_offloading.entry_name.2 = internal unnamed_addr constant [2 x i8] c"x\00" +// CUDA-COFF: @.omp_offloading.entry.x = weak constant %struct.__tgt_offload_entry { ptr @x, ptr @.omp_offloading.entry_name.2, i64 4, i32 0, i32 0 }, section ".cuda$OE", align 1 +//. +// HIP-COFF: @.omp_offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00" +// HIP-COFF: @.omp_offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z3foov, ptr @.omp_offloading.entry_name, i64 0, i32 0, i32 0 }, section ".hip$OE", align 1 +// HIP-COFF: @.omp_offloading.entry_name.1 = internal unnamed_addr constant [8 x i8] c"_Z3barv\00" +// HIP-COFF: @.omp_offloading.entry._Z3barv = weak constant %struct.__tgt_offload_entry { ptr @_Z3barv, ptr @.omp_offloading.entry_name.1, i64 0, i32 0, i32 0 }, section ".hip$OE", align 1 +// HIP-COFF: @.omp_offloading.entry_name.2 = internal unnamed_addr constant [2 x i8] c"x\00" +// HIP-COFF: @.omp_offloading.entry.x = weak constant %struct.__tgt_offload_entry { ptr @x, ptr @.omp_offloading.entry_name.2, i64 4, i32 0, i32 0 }, section ".hip$OE", align 1 +//. // CUDA-LABEL: @_Z18__device_stub__foov( // CUDA-NEXT: entry: // CUDA-NEXT: [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z18__device_stub__foov) @@ -37,6 +57,20 @@ // HIP: setup.end: // HIP-NEXT: ret void // +// CUDA-COFF-LABEL: @_Z18__device_stub__foov( +// CUDA-COFF-NEXT: entry: +// CUDA-COFF-NEXT: [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z18__device_stub__foov) +// CUDA-COFF-NEXT: br label [[SETUP_END:%.*]] +// CUDA-COFF: setup.end: +// CUDA-COFF-NEXT: ret void +// +// HIP-COFF-LABEL: @_Z18__device_stub__foov( +// HIP-COFF-NEXT: entry: +// HIP-COFF-NEXT: [[TMP0:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z3foov) +// HIP-COFF-NEXT: br label [[SETUP_END:%.*]] +// HIP-COFF: setup.end: +// HIP-COFF-NEXT: ret void +// __global__ void foo() {} // CUDA-LABEL: @_Z18__device_stub__barv( @@ -53,5 +87,19 @@ // HIP: setup.end: // HIP-NEXT: ret void // +// CUDA-COFF-LABEL: @_Z18__device_stub__barv( +// CUDA-COFF-NEXT: entry: +// CUDA-COFF-NEXT: [[TMP0:%.*]] = call i32 @cudaLaunch(ptr @_Z18__device_stub__barv) +// CUDA-COFF-NEXT: br label [[SETUP_END:%.*]] +// CUDA-COFF: setup.end: +// CUDA-COFF-NEXT: ret void +// +// HIP-COFF-LABEL: @_Z18__device_stub__barv( +// HIP-COFF-NEXT: entry: +// HIP-COFF-NEXT: [[TMP0:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z3barv) +// HIP-COFF-NEXT: br label [[SETUP_END:%.*]] +// HIP-COFF: setup.end: +// HIP-COFF-NEXT: ret void +// __global__ void bar() {} __device__ int x = 1; diff --git a/clang/test/Driver/linker-wrapper-image.c b/clang/test/Driver/linker-wrapper-image.c --- a/clang/test/Driver/linker-wrapper-image.c +++ b/clang/test/Driver/linker-wrapper-image.c @@ -6,16 +6,26 @@ // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ // RUN: -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=OPENMP - -// OPENMP: @__start_omp_offloading_entries = external hidden constant %__tgt_offload_entry -// OPENMP-NEXT: @__stop_omp_offloading_entries = external hidden constant %__tgt_offload_entry -// OPENMP-NEXT: @__dummy.omp_offloading.entry = hidden constant [0 x %__tgt_offload_entry] zeroinitializer, section "omp_offloading_entries" -// OPENMP-NEXT: @.omp_offloading.device_image = internal unnamed_addr constant [[[SIZE:[0-9]+]] x i8] c"\10\FF\10\AD{{.*}}" -// OPENMP-NEXT: @.omp_offloading.device_images = internal unnamed_addr constant [1 x %__tgt_device_image] [%__tgt_device_image { ptr @.omp_offloading.device_image, ptr getelementptr inbounds ([[[SIZE]] x i8], ptr @.omp_offloading.device_image, i64 1, i64 0), ptr @__start_omp_offloading_entries, ptr @__stop_omp_offloading_entries }] -// OPENMP-NEXT: @.omp_offloading.descriptor = internal constant %__tgt_bin_desc { i32 1, ptr @.omp_offloading.device_images, ptr @__start_omp_offloading_entries, ptr @__stop_omp_offloading_entries } -// OPENMP-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @.omp_offloading.descriptor_reg, ptr null }] -// OPENMP-NEXT: @llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @.omp_offloading.descriptor_unreg, ptr null }] +// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=OPENMP,OPENMP-ELF +// RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-windows-gnu \ +// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=OPENMP,OPENMP-COFF + +// OPENMP-ELF: @__start_omp_offloading_entries = external hidden constant [0 x %__tgt_offload_entry] +// OPENMP-ELF-NEXT: @__stop_omp_offloading_entries = external hidden constant [0 x %__tgt_offload_entry] +// OPENMP-ELF-NEXT: @__dummy.omp_offloading.entry = hidden constant [0 x %__tgt_offload_entry] zeroinitializer, section "omp_offloading_entries" +// OPENMP-ELF-NEXT: @.omp_offloading.device_image = internal unnamed_addr constant [[[SIZE:[0-9]+]] x i8] c"\10\FF\10\AD{{.*}}" +// OPENMP-ELF-NEXT: @.omp_offloading.device_images = internal unnamed_addr constant [1 x %__tgt_device_image] [%__tgt_device_image { ptr @.omp_offloading.device_image, ptr getelementptr inbounds ([[[SIZE]] x i8], ptr @.omp_offloading.device_image, i64 1, i64 0), ptr @__start_omp_offloading_entries, ptr @__stop_omp_offloading_entries }] +// OPENMP-ELF-NEXT: @.omp_offloading.descriptor = internal constant %__tgt_bin_desc { i32 1, ptr @.omp_offloading.device_images, ptr @__start_omp_offloading_entries, ptr @__stop_omp_offloading_entries } +// OPENMP-ELF-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @.omp_offloading.descriptor_reg, ptr null }] +// OPENMP-ELF-NEXT: @llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @.omp_offloading.descriptor_unreg, ptr null }] + +// OPENMP-COFF: @__start.omp_offloading.entry = hidden constant [0 x %__tgt_offload_entry] zeroinitializer, section ".omp$OA" +// OPENMP-COFF-NEXT: @__stop.omp_offloading.entry = hidden constant [0 x %__tgt_offload_entry] zeroinitializer, section ".omp$OZ" +// OPENMP-COFF-NEXT: @.omp_offloading.device_image = internal unnamed_addr constant [[[SIZE:[0-9]+]] x i8] c"\10\FF\10\AD{{.*}}" +// OPENMP-COFF-NEXT: @.omp_offloading.device_images = internal unnamed_addr constant [1 x %__tgt_device_image] [%__tgt_device_image { ptr @.omp_offloading.device_image, ptr getelementptr inbounds ([[[SIZE]] x i8], ptr @.omp_offloading.device_image, i64 1, i64 0), ptr getelementptr inbounds ([0 x %__tgt_offload_entry], ptr @__start.omp_offloading.entry, i64 0, i64 1), ptr @__stop.omp_offloading.entry }] +// OPENMP-COFF-NEXT: @.omp_offloading.descriptor = internal constant %__tgt_bin_desc { i32 1, ptr @.omp_offloading.device_images, ptr getelementptr inbounds ([0 x %__tgt_offload_entry], ptr @__start.omp_offloading.entry, i64 0, i64 1), ptr @__stop.omp_offloading.entry } +// OPENMP-COFF-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @.omp_offloading.descriptor_reg, ptr null }] +// OPENMP-COFF-NEXT: @llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @.omp_offloading.descriptor_unreg, ptr null }] // OPENMP: define internal void @.omp_offloading.descriptor_reg() section ".text.startup" { // OPENMP-NEXT: entry: @@ -33,15 +43,24 @@ // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ // RUN: -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CUDA - -// CUDA: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".nv_fatbin" -// CUDA-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1180844977, i32 1, ptr @.fatbin_image, ptr null }, section ".nvFatBinSegment", align 8 -// CUDA-NEXT: @__dummy.cuda_offloading.entry = hidden constant [0 x %__tgt_offload_entry] zeroinitializer, section "cuda_offloading_entries" -// CUDA-NEXT: @.cuda.binary_handle = internal global ptr null -// CUDA-NEXT: @__start_cuda_offloading_entries = external hidden constant [0 x %__tgt_offload_entry] -// CUDA-NEXT: @__stop_cuda_offloading_entries = external hidden constant [0 x %__tgt_offload_entry] -// CUDA-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @.cuda.fatbin_reg, ptr null }] +// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=CUDA,CUDA-ELF +// RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-windows-gnu \ +// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=CUDA,CUDA-COFF + +// CUDA-ELF: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".nv_fatbin" +// CUDA-ELF-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1180844977, i32 1, ptr @.fatbin_image, ptr null }, section ".nvFatBinSegment", align 8 +// CUDA-ELF-NEXT: @.cuda.binary_handle = internal global ptr null +// CUDA-ELF-NEXT: @__start_cuda_offloading_entries = external hidden constant [0 x %__tgt_offload_entry] +// CUDA-ELF-NEXT: @__stop_cuda_offloading_entries = external hidden constant [0 x %__tgt_offload_entry] +// CUDA-ELF-NEXT: @__dummy.cuda_offloading.entry = hidden constant [0 x %__tgt_offload_entry] zeroinitializer, section "cuda_offloading_entries" +// CUDA-ELF-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @.cuda.fatbin_reg, ptr null }] + +// CUDA-COFF: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".nv_fatbin" +// CUDA-COFF-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1180844977, i32 1, ptr @.fatbin_image, ptr null }, section ".nvFatBinSegment", align 8 +// CUDA-COFF-NEXT: @.cuda.binary_handle = internal global ptr null +// CUDA-COFF-NEXT: @__start.cuda_offloading.entry = hidden constant [0 x %__tgt_offload_entry] zeroinitializer, section ".cuda$OA" +// CUDA-COFF-NEXT: @__stop.cuda_offloading.entry = hidden constant [0 x %__tgt_offload_entry] zeroinitializer, section ".cuda$OZ" +// CUDA-COFF-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @.cuda.fatbin_reg, ptr null }] // CUDA: define internal void @.cuda.fatbin_reg() section ".text.startup" { // CUDA-NEXT: entry: @@ -62,10 +81,10 @@ // CUDA: define internal void @.cuda.globals_reg(ptr %0) section ".text.startup" { // CUDA-NEXT: entry: -// CUDA-NEXT: br i1 icmp ne (ptr @__start_cuda_offloading_entries, ptr @__stop_cuda_offloading_entries), label %while.entry, label %while.end +// CUDA-NEXT: br i1 icmp ne (ptr [[START_ENTRIES:.+]], ptr [[STOP_ENTRIES:.+]]), label %while.entry, label %while.end // CUDA: while.entry: -// CUDA-NEXT: %entry1 = phi ptr [ @__start_cuda_offloading_entries, %entry ], [ %7, %if.end ] +// CUDA-NEXT: %entry1 = phi ptr [ [[START_ENTRIES]], %entry ], [ %7, %if.end ] // CUDA-NEXT: %1 = getelementptr inbounds %__tgt_offload_entry, ptr %entry1, i64 0, i32 0 // CUDA-NEXT: %addr = load ptr, ptr %1, align 8 // CUDA-NEXT: %2 = getelementptr inbounds %__tgt_offload_entry, ptr %entry1, i64 0, i32 1 @@ -104,7 +123,7 @@ // CUDA: if.end: // CUDA-NEXT: %7 = getelementptr inbounds %__tgt_offload_entry, ptr %entry1, i64 1 -// CUDA-NEXT: %8 = icmp eq ptr %7, @__stop_cuda_offloading_entries +// CUDA-NEXT: %8 = icmp eq ptr %7, [[STOP_ENTRIES]] // CUDA-NEXT: br i1 %8, label %while.end, label %while.entry // CUDA: while.end: @@ -115,15 +134,24 @@ // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o \ // RUN: -fembed-offload-object=%t.out // RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-linux-gnu \ -// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HIP - -// HIP: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin" -// HIP-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8 -// HIP-NEXT: @__dummy.hip_offloading.entry = hidden constant [0 x %__tgt_offload_entry] zeroinitializer, section "hip_offloading_entries" -// HIP-NEXT: @.hip.binary_handle = internal global ptr null -// HIP-NEXT: @__start_hip_offloading_entries = external hidden constant [0 x %__tgt_offload_entry] -// HIP-NEXT: @__stop_hip_offloading_entries = external hidden constant [0 x %__tgt_offload_entry] -// HIP-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @.hip.fatbin_reg, ptr null }] +// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-ELF +// RUN: clang-linker-wrapper --print-wrapped-module --dry-run --host-triple=x86_64-unknown-windows-gnu \ +// RUN: --linker-path=/usr/bin/ld -- %t.o -o a.out 2>&1 | FileCheck %s --check-prefixes=HIP,HIP-COFF + +// HIP-ELF: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin" +// HIP-ELF-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8 +// HIP-ELF-NEXT: @.hip.binary_handle = internal global ptr null +// HIP-ELF-NEXT: @__start_hip_offloading_entries = external hidden constant [0 x %__tgt_offload_entry] +// HIP-ELF-NEXT: @__stop_hip_offloading_entries = external hidden constant [0 x %__tgt_offload_entry] +// HIP-ELF-NEXT: @__dummy.hip_offloading.entry = hidden constant [0 x %__tgt_offload_entry] zeroinitializer, section "hip_offloading_entries" +// HIP-ELF-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @.hip.fatbin_reg, ptr null }] + +// HIP-COFF: @.fatbin_image = internal constant [0 x i8] zeroinitializer, section ".hip_fatbin" +// HIP-COFF-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8 +// HIP-COFF-NEXT: @.hip.binary_handle = internal global ptr null +// HIP-COFF-NEXT: @__start.hip_offloading.entry = hidden constant [0 x %__tgt_offload_entry] zeroinitializer, section ".hip$OA" +// HIP-COFF-NEXT: @__stop.hip_offloading.entry = hidden constant [0 x %__tgt_offload_entry] zeroinitializer, section ".hip$OZ" +// HIP-COFF-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @.hip.fatbin_reg, ptr null }] // HIP: define internal void @.hip.fatbin_reg() section ".text.startup" { // HIP-NEXT: entry: @@ -143,10 +171,10 @@ // HIP: define internal void @.hip.globals_reg(ptr %0) section ".text.startup" { // HIP-NEXT: entry: -// HIP-NEXT: br i1 icmp ne (ptr @__start_hip_offloading_entries, ptr @__stop_hip_offloading_entries), label %while.entry, label %while.end +// HIP-NEXT: br i1 icmp ne (ptr [[START_ENTRIES:.+]], ptr [[STOP_ENTRIES:.+]]), label %while.entry, label %while.end // HIP: while.entry: -// HIP-NEXT: %entry1 = phi ptr [ @__start_hip_offloading_entries, %entry ], [ %7, %if.end ] +// HIP-NEXT: %entry1 = phi ptr [ [[START_ENTRIES]], %entry ], [ %7, %if.end ] // HIP-NEXT: %1 = getelementptr inbounds %__tgt_offload_entry, ptr %entry1, i64 0, i32 0 // HIP-NEXT: %addr = load ptr, ptr %1, align 8 // HIP-NEXT: %2 = getelementptr inbounds %__tgt_offload_entry, ptr %entry1, i64 0, i32 1 @@ -185,7 +213,7 @@ // HIP: if.end: // HIP-NEXT: %7 = getelementptr inbounds %__tgt_offload_entry, ptr %entry1, i64 1 -// HIP-NEXT: %8 = icmp eq ptr %7, @__stop_hip_offloading_entries +// HIP-NEXT: %8 = icmp eq ptr %7, [[STOP_ENTRIES]] // HIP-NEXT: br i1 %8, label %while.end, label %while.entry // HIP: while.end: diff --git a/clang/test/OpenMP/declare_target_link_codegen.cpp b/clang/test/OpenMP/declare_target_link_codegen.cpp --- a/clang/test/OpenMP/declare_target_link_codegen.cpp +++ b/clang/test/OpenMP/declare_target_link_codegen.cpp @@ -10,6 +10,8 @@ // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -emit-pch -o %t // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify -o - | FileCheck %s --check-prefix SIMD-ONLY +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple x86_64-win32-gnu -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix HOST-COFF --check-prefix CHECK + // expected-no-diagnostics // SIMD-ONLY-NOT: {{__kmpc|__tgt}} @@ -27,6 +29,7 @@ // HOST: [[MAPTYPES:@.+]] = private unnamed_addr constant [3 x i64] [i64 35, i64 531, i64 531] // HOST: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [{{[0-9]+}} x i8] c"c_decl_tgt_ref_ptr\00" // HOST: @.omp_offloading.entry.c_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { i8* bitcast (i32** @c_decl_tgt_ref_ptr to i8*), i8* getelementptr inbounds ([{{[0-9]+}} x i8], [{{[0-9]+}} x i8]* @.omp_offloading.entry_name, i32 0, i32 0), i64 8, i32 1, i32 0 }, section "omp_offloading_entries", align 1 +// HOST-COFF: @.omp_offloading.entry.c_decl_tgt_ref_ptr = weak constant %struct.__tgt_offload_entry { i8* bitcast (i32** @c_decl_tgt_ref_ptr to i8*), i8* getelementptr inbounds ([{{[0-9]+}} x i8], [{{[0-9]+}} x i8]* @.omp_offloading.entry_name, i32 0, i32 0), i64 8, i32 1, i32 0 }, section ".omp$OE", align 1 // DEVICE-NOT: internal unnamed_addr constant [{{[0-9]+}} x i8] c"c_{{.*}}_decl_tgt_ref_ptr\00" // HOST: @.omp_offloading.entry_name{{.*}} = internal unnamed_addr constant [{{[0-9]+}} x i8] c"_{{.*}}d_{{.*}}_decl_tgt_ref_ptr\00" // HOST: @.omp_offloading.entry.[[D_PTR]] = weak constant %struct.__tgt_offload_entry { i8* bitcast (i32** @[[D_PTR]] to i8*), i8* getelementptr inbounds ([{{[0-9]+}} x i8], [{{[0-9]+}} x i8]* @.omp_offloading.entry_name{{.*}}, i32 0, i32 0 @@ -50,7 +53,7 @@ return 0; } -// DEVICE: define weak_odr protected void @__omp_offloading_{{.*}}_{{.*}}maini1{{.*}}_l42(i32* noundef nonnull align {{[0-9]+}} dereferenceable{{[^,]*}} +// DEVICE: define weak_odr protected void @__omp_offloading_{{.*}}_{{.*}}maini1{{.*}}_l45(i32* noundef nonnull align {{[0-9]+}} dereferenceable{{[^,]*}} // DEVICE: [[C_REF:%.+]] = load i32*, i32** @c_decl_tgt_ref_ptr, // DEVICE: [[C:%.+]] = load i32, i32* [[C_REF]], // DEVICE: store i32 [[C]], i32* % @@ -78,10 +81,10 @@ // HOST: [[BP0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BASEPTRS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // HOST: [[P0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PTRS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // HOST: call i32 @__tgt_target_kernel(%struct.ident_t* @{{.+}}, i64 -1, i32 -1, i32 0, i8* @.{{.+}}.region_id, %struct.__tgt_kernel_arguments* %{{.+}}) -// HOST: call void @__omp_offloading_{{.*}}_{{.*}}_{{.*}}maini1{{.*}}_l42(i32* %{{[^,]+}}) +// HOST: call void @__omp_offloading_{{.*}}_{{.*}}_{{.*}}maini1{{.*}}_l45(i32* %{{[^,]+}}) // HOST: call i32 @__tgt_target_kernel(%struct.ident_t* @{{.+}}, i64 -1, i32 0, i32 0, i8* @.{{.+}}.region_id, %struct.__tgt_kernel_arguments* %{{.+}}) -// HOST: define internal void @__omp_offloading_{{.*}}_{{.*}}maini1{{.*}}_l42(i32* noundef nonnull align {{[0-9]+}} dereferenceable{{.*}}) +// HOST: define internal void @__omp_offloading_{{.*}}_{{.*}}maini1{{.*}}_l45(i32* noundef nonnull align {{[0-9]+}} dereferenceable{{.*}}) // HOST: [[C:%.*]] = load i32, i32* @c, // HOST: store i32 [[C]], i32* % diff --git a/clang/tools/clang-linker-wrapper/OffloadWrapper.cpp b/clang/tools/clang-linker-wrapper/OffloadWrapper.cpp --- a/clang/tools/clang-linker-wrapper/OffloadWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/OffloadWrapper.cpp @@ -110,6 +110,64 @@ return PointerType::getUnqual(getBinDescTy(M)); } +std::pair getELFEntriesArray(Module &M, + StringRef Kind) { + auto *EntriesB = new GlobalVariable( + M, ArrayType::get(getEntryTy(M), 0), /*isConstant*/ true, + GlobalValue::ExternalLinkage, + /*Initializer*/ nullptr, "__start_" + Kind + "_offloading_entries"); + EntriesB->setVisibility(GlobalValue::HiddenVisibility); + auto *EntriesE = new GlobalVariable( + M, ArrayType::get(getEntryTy(M), 0), /*isConstant*/ true, + GlobalValue::ExternalLinkage, + /*Initializer*/ nullptr, "__stop_" + Kind + "_offloading_entries"); + EntriesE->setVisibility(GlobalValue::HiddenVisibility); + + // We assume that external begin/end symbols that we have created above will + // be defined by the linker. But linker will do that only if linker inputs + // have section with "omp_offloading_entries" name which is not guaranteed. + // So, we just create dummy zero sized object in the offload entries section + // to force linker to define those symbols. + auto *DummyInit = + ConstantAggregateZero::get(ArrayType::get(getEntryTy(M), 0u)); + auto *DummyEntry = new GlobalVariable( + M, DummyInit->getType(), true, GlobalVariable::ExternalLinkage, DummyInit, + "__dummy." + Kind + "_offloading.entry"); + DummyEntry->setSection((Kind + "_offloading_entries").str()); + DummyEntry->setVisibility(GlobalValue::HiddenVisibility); + + return std::make_pair(EntriesB, EntriesE); +} + +std::pair getCOFFEntriesArray(Module &M, + StringRef Kind) { + // For COFF targets, sections with 8 or fewer characters containing a '$' will + // be merged into the same section at runtime. The order is determined by the + // alphebetical ordering of the text after the '$' character. Here we generate + // two dummy variables that will be placed at the start and end of that + // section respectively that can be used to iterate the section at runtime. + auto *EntriesInit = + ConstantAggregateZero::get(ArrayType::get(getEntryTy(M), 0u)); + auto *EntriesB = + new GlobalVariable(M, ArrayType::get(getEntryTy(M), 0), true, + GlobalVariable::ExternalLinkage, EntriesInit, + "__start." + Kind + "_offloading.entry"); + EntriesB->setSection(("." + Kind + "$OA").str()); + EntriesB->setVisibility(GlobalValue::HiddenVisibility); + auto *EntriesE = + new GlobalVariable(M, ArrayType::get(getEntryTy(M), 0), true, + GlobalVariable::ExternalLinkage, EntriesInit, + "__stop." + Kind + "_offloading.entry"); + EntriesE->setSection(("." + Kind + "$OZ").str()); + EntriesE->setVisibility(GlobalValue::HiddenVisibility); + + Constant *ZeroOne[] = {ConstantInt::get(getSizeTTy(M), 0u), + ConstantInt::get(getSizeTTy(M), 1u)}; + return std::make_pair(ConstantExpr::getGetElementPtr(EntriesB->getValueType(), + EntriesB, ZeroOne), + EntriesE); +} + /// Creates binary descriptor for the given device images. Binary descriptor /// is an object that is passed to the offloading runtime at program startup /// and it describes all device images available in the executable or shared @@ -150,28 +208,13 @@ /// Global variable that represents BinDesc is returned. GlobalVariable *createBinDesc(Module &M, ArrayRef> Bufs) { LLVMContext &C = M.getContext(); - // Create external begin/end symbols for the offload entries table. - auto *EntriesB = new GlobalVariable( - M, getEntryTy(M), /*isConstant*/ true, GlobalValue::ExternalLinkage, - /*Initializer*/ nullptr, "__start_omp_offloading_entries"); - EntriesB->setVisibility(GlobalValue::HiddenVisibility); - auto *EntriesE = new GlobalVariable( - M, getEntryTy(M), /*isConstant*/ true, GlobalValue::ExternalLinkage, - /*Initializer*/ nullptr, "__stop_omp_offloading_entries"); - EntriesE->setVisibility(GlobalValue::HiddenVisibility); + llvm::Triple Triple(M.getTargetTriple()); - // We assume that external begin/end symbols that we have created above will - // be defined by the linker. But linker will do that only if linker inputs - // have section with "omp_offloading_entries" name which is not guaranteed. - // So, we just create dummy zero sized object in the offload entries section - // to force linker to define those symbols. - auto *DummyInit = - ConstantAggregateZero::get(ArrayType::get(getEntryTy(M), 0u)); - auto *DummyEntry = new GlobalVariable( - M, DummyInit->getType(), true, GlobalVariable::ExternalLinkage, DummyInit, - "__dummy.omp_offloading.entry"); - DummyEntry->setSection("omp_offloading_entries"); - DummyEntry->setVisibility(GlobalValue::HiddenVisibility); + Constant *EntriesB, *EntriesE; + if (Triple.isOSBinFormatCOFF()) + std::tie(EntriesB, EntriesE) = getCOFFEntriesArray(M, "omp"); + else + std::tie(EntriesB, EntriesE) = getELFEntriesArray(M, "omp"); auto *Zero = ConstantInt::get(getSizeTTy(M), 0u); Constant *ZeroZero[] = {Zero, Zero}; @@ -327,18 +370,6 @@ FatbinDesc->setSection(FatbinWrapperSection); FatbinDesc->setAlignment(Align(8)); - // We create a dummy entry to ensure the linker will define the begin / end - // symbols. The CUDA runtime should ignore the null address if we attempt to - // register it. - auto *DummyInit = - ConstantAggregateZero::get(ArrayType::get(getEntryTy(M), 0u)); - auto *DummyEntry = new GlobalVariable( - M, DummyInit->getType(), true, GlobalVariable::ExternalLinkage, DummyInit, - IsHIP ? "__dummy.hip_offloading.entry" : "__dummy.cuda_offloading.entry"); - DummyEntry->setVisibility(GlobalValue::HiddenVisibility); - DummyEntry->setSection(IsHIP ? "hip_offloading_entries" - : "cuda_offloading_entries"); - return FatbinDesc; } @@ -367,6 +398,7 @@ /// } Function *createRegisterGlobalsFunction(Module &M, bool IsHIP) { LLVMContext &C = M.getContext(); + llvm::Triple Triple(M.getTargetTriple()); // Get the __cudaRegisterFunction function declaration. auto *RegFuncTy = FunctionType::get( Type::getInt32Ty(C), @@ -388,21 +420,13 @@ FunctionCallee RegVar = M.getOrInsertFunction( IsHIP ? "__hipRegisterVar" : "__cudaRegisterVar", RegVarTy); - // Create the references to the start / stop symbols defined by the linker. - auto *EntriesB = - new GlobalVariable(M, ArrayType::get(getEntryTy(M), 0), - /*isConstant*/ true, GlobalValue::ExternalLinkage, - /*Initializer*/ nullptr, - IsHIP ? "__start_hip_offloading_entries" - : "__start_cuda_offloading_entries"); - EntriesB->setVisibility(GlobalValue::HiddenVisibility); - auto *EntriesE = - new GlobalVariable(M, ArrayType::get(getEntryTy(M), 0), - /*isConstant*/ true, GlobalValue::ExternalLinkage, - /*Initializer*/ nullptr, - IsHIP ? "__stop_hip_offloading_entries" - : "__stop_cuda_offloading_entries"); - EntriesE->setVisibility(GlobalValue::HiddenVisibility); + Constant *EntriesB, *EntriesE; + if (Triple.isOSBinFormatCOFF()) + std::tie(EntriesB, EntriesE) = + getCOFFEntriesArray(M, IsHIP ? "hip" : "cuda"); + else + std::tie(EntriesB, EntriesE) = + getELFEntriesArray(M, IsHIP ? "hip" : "cuda"); auto *RegGlobalsTy = FunctionType::get(Type::getVoidTy(C), Type::getInt8PtrTy(C)->getPointerTo(), diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -867,8 +867,7 @@ /// \param Flags Flags associated with the entry. /// \param SectionName The section this entry will be placed at. void emitOffloadingEntry(Constant *Addr, StringRef Name, uint64_t Size, - int32_t Flags, - StringRef SectionName = "omp_offloading_entries"); + int32_t Flags, StringRef SectionName); /// Generate control flow and cleanup for cancellation. /// diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -4697,7 +4697,10 @@ int32_t Flags, GlobalValue::LinkageTypes) { if (!IsTargetCodegen) { - emitOffloadingEntry(ID, Addr->getName(), Size, Flags); + llvm::Triple Triple(M.getTargetTriple()); + emitOffloadingEntry(ID, Addr->getName(), Size, Flags, + Triple.isOSBinFormatCOFF() ? ".omp$OE" + : "omp_offloading_entries"); return; } // TODO: Add support for global variables on the device after declare target