diff --git a/clang/test/CodeGenCUDA/atomics-remarks-gfx90a.cu b/clang/test/CodeGenCUDA/atomics-remarks-gfx90a.cu new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCUDA/atomics-remarks-gfx90a.cu @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 %s -triple=amdgcn-amd-amdhsa -fcuda-is-device \ +// RUN: -target-cpu gfx90a -Rpass=atomic-expand -S -o - 2>&1 | \ +// RUN: FileCheck %s --check-prefix=GFX90A-CAS + +// REQUIRES: amdgpu-registered-target + +#include "Inputs/cuda.h" +#include + +// GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at system memory scope +// GFX90A-CAS-LABEL: _Z14atomic_add_casPf +// GFX90A-CAS: flat_atomic_cmpswap v0, v[2:3], v[4:5] glc +// GFX90A-CAS: s_cbranch_execnz +__device__ float atomic_add_cas(float *p) { + return __atomic_fetch_add(p, 1.0f, memory_order_relaxed); +} diff --git a/clang/test/CodeGenOpenCL/atomics-remarks-gfx90a.cl b/clang/test/CodeGenOpenCL/atomics-remarks-gfx90a.cl new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenOpenCL/atomics-remarks-gfx90a.cl @@ -0,0 +1,46 @@ +// RUN: %clang_cc1 %s -cl-std=CL2.0 -O0 -triple=amdgcn-amd-amdhsa -target-cpu gfx90a \ +// RUN: -Rpass=atomic-expand -S -o - 2>&1 | \ +// RUN: FileCheck %s --check-prefix=REMARK + +// RUN: %clang_cc1 %s -cl-std=CL2.0 -O0 -triple=amdgcn-amd-amdhsa -target-cpu gfx90a \ +// RUN: -Rpass=atomic-expand -S -emit-llvm -o - 2>&1 | \ +// RUN: FileCheck %s --check-prefix=GFX90A-CAS + +// REQUIRES: amdgpu-registered-target + +typedef enum memory_order { + memory_order_relaxed = __ATOMIC_RELAXED, + memory_order_acquire = __ATOMIC_ACQUIRE, + memory_order_release = __ATOMIC_RELEASE, + memory_order_acq_rel = __ATOMIC_ACQ_REL, + memory_order_seq_cst = __ATOMIC_SEQ_CST +} memory_order; + +typedef enum memory_scope { + memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, + memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, + memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, + memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, +#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) + memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP +#endif +} memory_scope; + +// REMARK: remark: A compare and swap loop was generated for an atomic fadd operation at workgroup-one-as memory scope [-Rpass=atomic-expand] +// REMARK: remark: A compare and swap loop was generated for an atomic fadd operation at agent-one-as memory scope [-Rpass=atomic-expand] +// REMARK: remark: A compare and swap loop was generated for an atomic fadd operation at one-as memory scope [-Rpass=atomic-expand] +// REMARK: remark: A compare and swap loop was generated for an atomic fadd operation at wavefront-one-as memory scope [-Rpass=atomic-expand] +// GFX90A-CAS-LABEL: @atomic_cas +// GFX90A-CAS: atomicrmw fadd float addrspace(1)* {{.*}} syncscope("workgroup-one-as") monotonic +// GFX90A-CAS: atomicrmw fadd float addrspace(1)* {{.*}} syncscope("agent-one-as") monotonic +// GFX90A-CAS: atomicrmw fadd float addrspace(1)* {{.*}} syncscope("one-as") monotonic +// GFX90A-CAS: atomicrmw fadd float addrspace(1)* {{.*}} syncscope("wavefront-one-as") monotonic +float atomic_cas(__global atomic_float *d, float a) { + float ret1 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group); + float ret2 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_device); + float ret3 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_all_svm_devices); + float ret4 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_sub_group); +} + + + diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/CodeGen/AtomicExpandUtils.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/TargetLowering.h" @@ -58,6 +59,7 @@ class AtomicExpand: public FunctionPass { const TargetLowering *TLI = nullptr; + std::unique_ptr ORE; public: static char ID; // Pass identification, replacement for typeid @@ -170,6 +172,7 @@ if (!TPC) return false; + ORE = std::make_unique(&F); auto &TM = TPC->getTM(); if (!TM.getSubtargetImpl(F)->enableAtomicExpand()) return false; @@ -570,7 +573,9 @@ } bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { - switch (TLI->shouldExpandAtomicRMWInIR(AI)) { + LLVMContext &Ctx = AI->getModule()->getContext(); + TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI); + switch (Kind) { case TargetLoweringBase::AtomicExpansionKind::None: return false; case TargetLoweringBase::AtomicExpansionKind::LLSC: { @@ -600,6 +605,17 @@ expandPartwordAtomicRMW(AI, TargetLoweringBase::AtomicExpansionKind::CmpXChg); } else { + SmallVector SSNs; + Ctx.getSyncScopeNames(SSNs); + auto MemScope = SSNs[AI->getSyncScopeID()].empty() + ? "system" + : SSNs[AI->getSyncScopeID()]; + ORE->emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "Passed", AI->getFunction()) + << "A compare and swap loop was generated for an atomic " + << AI->getOperationName(AI->getOperation()) << " operation at " + << MemScope << " memory scope"; + }); expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun); } return true; diff --git a/llvm/test/CodeGen/AMDGPU/atomics-remarks-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/atomics-remarks-gfx90a.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/atomics-remarks-gfx90a.ll @@ -0,0 +1,103 @@ +; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs --pass-remarks=atomic-expand \ +; RUN: %s -o - 2>&1 | FileCheck %s --check-prefix=GFX90A-CAS + +; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at system memory scope +; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at agent memory scope +; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at workgroup memory scope +; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at wavefront memory scope +; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at singlethread memory scope +; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at one-as memory scope +; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at agent-one-as memory scope +; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at workgroup-one-as memory scope +; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at wavefront-one-as memory scope +; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at singlethread-one-as memory scope + +; GFX90A-CAS-LABEL: atomic_add_cas: +; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc +; GFX90A-CAS: s_cbranch_execnz +define dso_local void @atomic_add_cas(float* %p, float %q) { +entry: + %ret = atomicrmw fadd float* %p, float %q monotonic, align 4 + ret void +} + +; GFX90A-CAS-LABEL: atomic_add_cas_agent: +; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc +; GFX90A-CAS: s_cbranch_execnz +define dso_local void @atomic_add_cas_agent(float* %p, float %q) { +entry: + %ret = atomicrmw fadd float* %p, float %q syncscope("agent") monotonic, align 4 + ret void +} + +; GFX90A-CAS-LABEL: atomic_add_cas_workgroup: +; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc +; GFX90A-CAS: s_cbranch_execnz +define dso_local void @atomic_add_cas_workgroup(float* %p, float %q) { +entry: + %ret = atomicrmw fadd float* %p, float %q syncscope("workgroup") monotonic, align 4 + ret void +} + +; GFX90A-CAS-LABEL: atomic_add_cas_wavefront: +; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc +; GFX90A-CAS: s_cbranch_execnz +define dso_local void @atomic_add_cas_wavefront(float* %p, float %q) { +entry: + %ret = atomicrmw fadd float* %p, float %q syncscope("wavefront") monotonic, align 4 + ret void +} + +; GFX90A-CAS-LABEL: atomic_add_cas_singlethread: +; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc +; GFX90A-CAS: s_cbranch_execnz +define dso_local void @atomic_add_cas_singlethread(float* %p, float %q) { +entry: + %ret = atomicrmw fadd float* %p, float %q syncscope("singlethread") monotonic, align 4 + ret void +} + +; GFX90A-CAS-LABEL: atomic_add_cas_one_as: +; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc +; GFX90A-CAS: s_cbranch_execnz +define dso_local void @atomic_add_cas_one_as(float* %p, float %q) { +entry: + %ret = atomicrmw fadd float* %p, float %q syncscope("one-as") monotonic, align 4 + ret void +} + +; GFX90A-CAS-LABEL: atomic_add_cas_agent_one_as: +; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc +; GFX90A-CAS: s_cbranch_execnz +define dso_local void @atomic_add_cas_agent_one_as(float* %p, float %q) { +entry: + %ret = atomicrmw fadd float* %p, float %q syncscope("agent-one-as") monotonic, align 4 + ret void +} + +; GFX90A-CAS-LABEL: atomic_add_cas_workgroup_one_as: +; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc +; GFX90A-CAS: s_cbranch_execnz +define dso_local void @atomic_add_cas_workgroup_one_as(float* %p, float %q) { +entry: + %ret = atomicrmw fadd float* %p, float %q syncscope("workgroup-one-as") monotonic, align 4 + ret void +} + +; GFX90A-CAS-LABEL: atomic_add_cas_wavefront_one_as: +; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc +; GFX90A-CAS: s_cbranch_execnz +define dso_local void @atomic_add_cas_wavefront_one_as(float* %p, float %q) { +entry: + %ret = atomicrmw fadd float* %p, float %q syncscope("wavefront-one-as") monotonic, align 4 + ret void +} + +; GFX90A-CAS-LABEL: atomic_add_cas_singlethread_one_as: +; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc +; GFX90A-CAS: s_cbranch_execnz +define dso_local void @atomic_add_cas_singlethread_one_as(float* %p, float %q) { +entry: + %ret = atomicrmw fadd float* %p, float %q syncscope("singlethread-one-as") monotonic, align 4 + ret void +} diff --git a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll --- a/llvm/test/CodeGen/PowerPC/O3-pipeline.ll +++ b/llvm/test/CodeGen/PowerPC/O3-pipeline.ll @@ -206,4 +206,4 @@ define void @f() { ret void -} \ No newline at end of file +}