diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -252,7 +252,7 @@ std::string BitcodeSuffix; if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime, options::OPT_fno_openmp_target_new_runtime, false)) - BitcodeSuffix = "new-amdgcn-" + GPUArch; + BitcodeSuffix = "new-amdgpu-" + GPUArch; else BitcodeSuffix = "amdgcn-" + GPUArch; diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -29,7 +29,7 @@ find_program(LINK_TOOL llvm-link PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH) find_program(OPT_TOOL opt PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH) - libomptarget_say("Building Device RTL. Using clang: ${CLANG_TOOL}") + libomptarget_say("Building DeviceRTL. Using clang: ${CLANG_TOOL}") elseif (LLVM_TOOL_CLANG_BUILD AND NOT CMAKE_CROSSCOMPILING AND NOT OPENMP_STANDALONE_BUILD) # LLVM in-tree builds may use CMake target names to discover the tools. set(CLANG_TOOL $) @@ -63,7 +63,7 @@ set(all_capabilities 35 37 50 52 53 60 61 62 70 72 75 80) set(LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES ${all_capabilities} CACHE STRING - "List of CUDA Compute Capabilities to be used to compile the NVPTX device RTL.") + "List of CUDA Compute Capabilities to be used to compile the NVPTX DeviceRTL.") string(TOLOWER ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES} LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES) if (LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES STREQUAL "all") @@ -80,7 +80,7 @@ # If user set LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES to empty, we disable the # build. if (NOT nvptx_sm_list) - libomptarget_say("Not building CUDA offloading device RTL: empty compute capability list") + libomptarget_say("Not building CUDA offloading DeviceRTL: empty compute capability list") return() endif() @@ -91,6 +91,12 @@ endif() endforeach() +set(amdgpu_mcpus gfx700 gfx701 gfx801 gfx803 gfx900 gfx902 gfx906 gfx908 gfx90a gfx1010 gfx1030 gfx1031) +if (DEFINED LIBOMPTARGET_AMDGCN_GFXLIST) + set(amdgpu_mcpus ${LIBOMPTARGET_AMDGCN_GFXLIST}) +endif() + + # Activate RTL message dumps if requested by the user. set(LIBOMPTARGET_DEVICE_DEBUG FALSE CACHE BOOL "Activate DeviceRTL debug messages.") @@ -121,11 +127,9 @@ # Set flags for LLVM Bitcode compilation. set(bc_flags -S -x c++ -std=c++17 ${clang_opt_flags} - -target nvptx64 -Xclang -emit-llvm-bc -Xclang -aux-triple -Xclang ${aux_triple} -fopenmp -fopenmp-cuda-mode -Xclang -fopenmp-is-device - -Xclang -target-feature -Xclang +ptx61 -I${include_directory} -I${devicertl_base_directory}/../include ${LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL} @@ -137,23 +141,21 @@ list(APPEND bc_flags -DOMPTARGET_DEBUG=0) endif() -# Create target to build all Bitcode libraries. -add_custom_target(omptarget-new-nvptx-bc) -add_dependencies(omptarget-new-nvptx-bc opt llvm-link) +function(compileDeviceRTLLibrary target_cpu target_name) + set(target_bc_flags ${ARGN}) -# Generate a Bitcode library for all the compute capabilities the user requested -foreach(sm ${nvptx_sm_list}) - # TODO: replace this with declare variant and isa selector. - set(cuda_flags -Xclang -target-cpu -Xclang sm_${sm} "-D__CUDA_ARCH__=${sm}0") set(bc_files "") foreach(src ${src_files}) get_filename_component(infile ${src} ABSOLUTE) get_filename_component(outfile ${src} NAME) - set(outfile "${outfile}-sm_${sm}.bc") + set(outfile "${outfile}-${target_cpu}.bc") add_custom_command(OUTPUT ${outfile} - COMMAND ${CLANG_TOOL} ${bc_flags} - ${cuda_flags} ${infile} -o ${outfile} + COMMAND ${CLANG_TOOL} + ${bc_flags} + -Xclang -target-cpu -Xclang ${target_cpu} + ${target_bc_flags} + ${infile} -o ${outfile} DEPENDS ${infile} IMPLICIT_DEPENDS CXX ${infile} COMMENT "Building LLVM bitcode ${outfile}" @@ -173,43 +175,41 @@ list(APPEND bc_files ${outfile}) endforeach() - set(bclib_name "libomptarget-new-nvptx-sm_${sm}.bc") + set(bclib_name "libomptarget-new-${target_name}-${target_cpu}.bc") # Link to a bitcode library. - add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} + add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/linked_${bclib_name} COMMAND ${LINK_TOOL} - -o ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} ${bc_files} + -o ${CMAKE_CURRENT_BINARY_DIR}/linked_${bclib_name} ${bc_files} DEPENDS ${bc_files} COMMENT "Linking LLVM bitcode ${bclib_name}" ) - add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}_opt - COMMAND ${OPT_TOOL} ${link_opt_flags} ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} + add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} + COMMAND ${OPT_TOOL} ${link_opt_flags} ${CMAKE_CURRENT_BINARY_DIR}/linked_${bclib_name} -o ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/linked_${bclib_name} COMMENT "Optimizing LLVM bitcode ${bclib_name}" ) # Add a file-level dependency to ensure that llvm-link and opt are up-to-date. # By default, add_custom_command only builds the tool if the executable is missing if("${LINK_TOOL}" STREQUAL "$") - add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} + add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/linked_${bclib_name} DEPENDS llvm-link APPEND) endif() if("${OPT_TOOL}" STREQUAL "$") - add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}_opt + add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} DEPENDS opt APPEND) endif() set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${bclib_name}) - set(bclib_target_name "omptarget-new-nvptx-sm_${sm}-bc") + set(bclib_target_name "omptarget-new-${target_name}-${target_cpu}-bc") - add_custom_target(${bclib_target_name} ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}_opt) - add_dependencies(omptarget-new-nvptx-bc ${bclib_target_name}) - add_dependencies(${bclib_target_name} opt llvm-link) + add_custom_target(${bclib_target_name} ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}) # Copy library to destination. add_custom_command(TARGET ${bclib_target_name} POST_BUILD @@ -218,4 +218,13 @@ # Install bitcode library under the lib destination folder. install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} DESTINATION "${OPENMP_INSTALL_LIBDIR}") +endfunction() + +# Generate a Bitcode library for all the compute capabilities the user requested +foreach(sm ${nvptx_sm_list}) + compileDeviceRTLLibrary(sm_${sm} nvptx -target nvptx64 -Xclang -target-feature -Xclang +ptx61 "-D__CUDA_ARCH__=${sm}0") +endforeach() + +foreach(mcpu ${amdgpu_mcpus}) + compileDeviceRTLLibrary(${mcpu} amdgpu -target amdgcn-amd-amdhsa "-D__AMDGCN__" -fvisibility=default -nogpulib) endforeach() diff --git a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp --- a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp @@ -20,14 +20,14 @@ #pragma omp declare target -extern uint32_t __omp_rtl_debug_kind; +// extern uint32_t __omp_rtl_debug_kind; // TOOD: We want to change the name as soon as the old runtime is gone. DeviceEnvironmentTy CONSTANT(omptarget_device_environment) __attribute__((used)); uint32_t config::getDebugKind() { - return __omp_rtl_debug_kind & omptarget_device_environment.DebugKind; + return /*__omp_rtl_debug_kind &*/ omptarget_device_environment.DebugKind; } uint32_t config::getNumDevices() { diff --git a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp --- a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp @@ -68,8 +68,23 @@ ///{ #pragma omp begin declare variant match(device = {arch(amdgcn)}) -uint32_t atomicInc(uint32_t *Address, uint32_t Val, int Ordering) { - return __builtin_amdgcn_atomic_inc32(Address, Val, Ordering, ""); +uint32_t atomicInc(uint32_t *A, uint32_t V, int Ordering) { + // builtin_amdgcn_atomic_inc32 should expand to this switch when + // passed a runtime value, but does not do so yet. Workaround here. + switch (Ordering) { + default: + __builtin_unreachable(); + case __ATOMIC_RELAXED: + return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_RELAXED, ""); + case __ATOMIC_ACQUIRE: + return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_ACQUIRE, ""); + case __ATOMIC_RELEASE: + return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_RELEASE, ""); + case __ATOMIC_ACQ_REL: + return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_ACQ_REL, ""); + case __ATOMIC_SEQ_CST: + return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_SEQ_CST, ""); + } } uint32_t SHARED(namedBarrierTracker); @@ -126,17 +141,64 @@ fence::team(__ATOMIC_RELEASE); } +// sema checking of amdgcn_fence is aggressive. Intention is to patch clang +// so that it is usable within a template environment and so that a runtime +// value of the memory order is expanded to this switch within clang/llvm. +void fenceTeam(int Ordering) { + switch (Ordering) { + default: + __builtin_unreachable(); + case __ATOMIC_ACQUIRE: + return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "workgroup"); + case __ATOMIC_RELEASE: + return __builtin_amdgcn_fence(__ATOMIC_RELEASE, "workgroup"); + case __ATOMIC_ACQ_REL: + return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "workgroup"); + case __ATOMIC_SEQ_CST: + return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup"); + } +} +void fenceKernel(int Ordering) { + switch (Ordering) { + default: + __builtin_unreachable(); + case __ATOMIC_ACQUIRE: + return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent"); + case __ATOMIC_RELEASE: + return __builtin_amdgcn_fence(__ATOMIC_RELEASE, "agent"); + case __ATOMIC_ACQ_REL: + return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "agent"); + case __ATOMIC_SEQ_CST: + return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "agent"); + } +} +void fenceSystem(int Ordering) { + switch (Ordering) { + default: + __builtin_unreachable(); + case __ATOMIC_ACQUIRE: + return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, ""); + case __ATOMIC_RELEASE: + return __builtin_amdgcn_fence(__ATOMIC_RELEASE, ""); + case __ATOMIC_ACQ_REL: + return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, ""); + case __ATOMIC_SEQ_CST: + return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, ""); + } +} + void syncWarp(__kmpc_impl_lanemask_t) { // AMDGCN doesn't need to sync threads in a warp } void syncThreads() { __builtin_amdgcn_s_barrier(); } -void fenceTeam(int Ordering) { __builtin_amdgcn_fence(Ordering, "workgroup"); } - -void fenceKernel(int Ordering) { __builtin_amdgcn_fence(Ordering, "agent"); } - -void fenceSystem(int Ordering) { __builtin_amdgcn_fence(Ordering, ""); } +// TODO: Don't have wavefront lane locks. Possibly can't have them. +void unsetLock(omp_lock_t *) { __builtin_trap(); } +int testLock(omp_lock_t *) { __builtin_trap(); } +void initLock(omp_lock_t *) { __builtin_trap(); } +void destroyLock(omp_lock_t *) { __builtin_trap(); } +void setLock(omp_lock_t *) { __builtin_trap(); } #pragma omp end declare variant ///} @@ -238,7 +300,7 @@ } void atomic::store(uint32_t *Addr, uint32_t V, int Ordering) { - impl::atomicStore(Addr, V, Ordering); + impl::atomicStore(Addr, V, Ordering); } uint32_t atomic::inc(uint32_t *Addr, uint32_t V, int Ordering) { diff --git a/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt b/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt --- a/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt +++ b/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt @@ -122,3 +122,4 @@ # Report to the parent scope that we are building a plugin for amdgpu set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS} amdgcn-amd-amdhsa" PARENT_SCOPE) +set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS} amdgcn-amd-amdhsa-newRTL" PARENT_SCOPE) diff --git a/openmp/libomptarget/test/mapping/data_member_ref.cpp b/openmp/libomptarget/test/mapping/data_member_ref.cpp --- a/openmp/libomptarget/test/mapping/data_member_ref.cpp +++ b/openmp/libomptarget/test/mapping/data_member_ref.cpp @@ -2,6 +2,7 @@ // amdgcn does not have printf definition // XFAIL: amdgcn-amd-amdhsa +// XFAIL: amdgcn-amd-amdhsa-newRTL #include diff --git a/openmp/libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp b/openmp/libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp --- a/openmp/libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp +++ b/openmp/libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp @@ -2,6 +2,7 @@ // amdgcn does not have printf definition // XFAIL: amdgcn-amd-amdhsa +// XFAIL: amdgcn-amd-amdhsa-newRTL #include #include diff --git a/openmp/libomptarget/test/mapping/declare_mapper_nested_mappers.cpp b/openmp/libomptarget/test/mapping/declare_mapper_nested_mappers.cpp --- a/openmp/libomptarget/test/mapping/declare_mapper_nested_mappers.cpp +++ b/openmp/libomptarget/test/mapping/declare_mapper_nested_mappers.cpp @@ -2,6 +2,7 @@ // amdgcn does not have printf definition // XFAIL: amdgcn-amd-amdhsa +// XFAIL: amdgcn-amd-amdhsa-newRTL #include #include diff --git a/openmp/libomptarget/test/mapping/delete_inf_refcount.c b/openmp/libomptarget/test/mapping/delete_inf_refcount.c --- a/openmp/libomptarget/test/mapping/delete_inf_refcount.c +++ b/openmp/libomptarget/test/mapping/delete_inf_refcount.c @@ -2,6 +2,7 @@ // fails with error message 'Unable to generate target entries' on amdgcn // XFAIL: amdgcn-amd-amdhsa +// XFAIL: amdgcn-amd-amdhsa-newRTL #include #include diff --git a/openmp/libomptarget/test/mapping/lambda_by_value.cpp b/openmp/libomptarget/test/mapping/lambda_by_value.cpp --- a/openmp/libomptarget/test/mapping/lambda_by_value.cpp +++ b/openmp/libomptarget/test/mapping/lambda_by_value.cpp @@ -2,6 +2,7 @@ // amdgcn does not have printf definition // XFAIL: amdgcn-amd-amdhsa +// XFAIL: amdgcn-amd-amdhsa-newRTL #include #include diff --git a/openmp/libomptarget/test/mapping/ompx_hold/struct.c b/openmp/libomptarget/test/mapping/ompx_hold/struct.c --- a/openmp/libomptarget/test/mapping/ompx_hold/struct.c +++ b/openmp/libomptarget/test/mapping/ompx_hold/struct.c @@ -3,6 +3,7 @@ // amdgcn does not have printf definition // XFAIL: amdgcn-amd-amdhsa +// XFAIL: amdgcn-amd-amdhsa-newRTL #include #include diff --git a/openmp/libomptarget/test/mapping/ptr_and_obj_motion.c b/openmp/libomptarget/test/mapping/ptr_and_obj_motion.c --- a/openmp/libomptarget/test/mapping/ptr_and_obj_motion.c +++ b/openmp/libomptarget/test/mapping/ptr_and_obj_motion.c @@ -2,6 +2,7 @@ // amdgcn does not have printf definition // XFAIL: amdgcn-amd-amdhsa +// XFAIL: amdgcn-amd-amdhsa-newRTL #include diff --git a/openmp/libomptarget/test/mapping/reduction_implicit_map.cpp b/openmp/libomptarget/test/mapping/reduction_implicit_map.cpp --- a/openmp/libomptarget/test/mapping/reduction_implicit_map.cpp +++ b/openmp/libomptarget/test/mapping/reduction_implicit_map.cpp @@ -2,6 +2,7 @@ // amdgcn does not have printf definition // UNSUPPORTED: amdgcn-amd-amdhsa +// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL #include diff --git a/openmp/libomptarget/test/offloading/bug49021.cpp b/openmp/libomptarget/test/offloading/bug49021.cpp --- a/openmp/libomptarget/test/offloading/bug49021.cpp +++ b/openmp/libomptarget/test/offloading/bug49021.cpp @@ -2,6 +2,7 @@ // Wrong results on amdgcn // UNSUPPORTED: amdgcn-amd-amdhsa +// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL #include diff --git a/openmp/libomptarget/test/offloading/bug49334.cpp b/openmp/libomptarget/test/offloading/bug49334.cpp --- a/openmp/libomptarget/test/offloading/bug49334.cpp +++ b/openmp/libomptarget/test/offloading/bug49334.cpp @@ -2,7 +2,7 @@ // Currently hangs on amdgpu // UNSUPPORTED: amdgcn-amd-amdhsa - +// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL // UNSUPPORTED: x86_64-pc-linux-gnu #include diff --git a/openmp/libomptarget/test/offloading/bug50022.cpp b/openmp/libomptarget/test/offloading/bug50022.cpp --- a/openmp/libomptarget/test/offloading/bug50022.cpp +++ b/openmp/libomptarget/test/offloading/bug50022.cpp @@ -1,6 +1,7 @@ // RUN: %libomptarget-compilexx-and-run-generic // UNSUPPORTED: amdgcn-amd-amdhsa +// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL #include #include diff --git a/openmp/libomptarget/test/offloading/global_constructor.cpp b/openmp/libomptarget/test/offloading/global_constructor.cpp --- a/openmp/libomptarget/test/offloading/global_constructor.cpp +++ b/openmp/libomptarget/test/offloading/global_constructor.cpp @@ -2,6 +2,7 @@ // Fails in DAGToDAG on an address space problem // UNSUPPORTED: amdgcn-amd-amdhsa +// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL #include #include diff --git a/openmp/libomptarget/test/offloading/host_as_target.c b/openmp/libomptarget/test/offloading/host_as_target.c --- a/openmp/libomptarget/test/offloading/host_as_target.c +++ b/openmp/libomptarget/test/offloading/host_as_target.c @@ -9,6 +9,7 @@ // amdgcn does not have printf definition // XFAIL: amdgcn-amd-amdhsa +// XFAIL: amdgcn-amd-amdhsa-newRTL #include #include diff --git a/openmp/libomptarget/test/unified_shared_memory/api.c b/openmp/libomptarget/test/unified_shared_memory/api.c --- a/openmp/libomptarget/test/unified_shared_memory/api.c +++ b/openmp/libomptarget/test/unified_shared_memory/api.c @@ -4,6 +4,7 @@ // Fails on amdgcn with error: GPU Memory Error // XFAIL: amdgcn-amd-amdhsa +// XFAIL: amdgcn-amd-amdhsa-newRTL #include #include diff --git a/openmp/libomptarget/test/unified_shared_memory/close_enter_exit.c b/openmp/libomptarget/test/unified_shared_memory/close_enter_exit.c --- a/openmp/libomptarget/test/unified_shared_memory/close_enter_exit.c +++ b/openmp/libomptarget/test/unified_shared_memory/close_enter_exit.c @@ -5,6 +5,7 @@ // Fails on amdgcn with error: GPU Memory Error // XFAIL: amdgcn-amd-amdhsa +// XFAIL: amdgcn-amd-amdhsa-newRTL #include #include diff --git a/openmp/libomptarget/test/unified_shared_memory/close_modifier.c b/openmp/libomptarget/test/unified_shared_memory/close_modifier.c --- a/openmp/libomptarget/test/unified_shared_memory/close_modifier.c +++ b/openmp/libomptarget/test/unified_shared_memory/close_modifier.c @@ -5,6 +5,7 @@ // amdgcn does not have printf definition // XFAIL: amdgcn-amd-amdhsa +// XFAIL: amdgcn-amd-amdhsa-newRTL #include #include diff --git a/openmp/libomptarget/test/unified_shared_memory/shared_update.c b/openmp/libomptarget/test/unified_shared_memory/shared_update.c --- a/openmp/libomptarget/test/unified_shared_memory/shared_update.c +++ b/openmp/libomptarget/test/unified_shared_memory/shared_update.c @@ -4,6 +4,7 @@ // amdgcn does not have printf definition // XFAIL: amdgcn-amd-amdhsa +// XFAIL: amdgcn-amd-amdhsa-newRTL #include #include