diff --git a/clang/test/CodeGenOpenCL/atomics-remarks-gfx90a.cl b/clang/test/CodeGenOpenCL/atomics-remarks-gfx90a.cl new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenOpenCL/atomics-remarks-gfx90a.cl @@ -0,0 +1,46 @@ +// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple=amdgcn-amd-amdhsa -target-cpu gfx90a \ +// RUN: -Rpass=si-lower -munsafe-fp-atomics %s -S -o - 2>&1 | \ +// RUN: FileCheck %s --check-prefix=GFX90A-HW-REMARK + +// RUN: %clang_cc1 %s -cl-std=CL2.0 -O0 -triple=amdgcn-amd-amdhsa -target-cpu gfx90a \ +// RUN: -Rpass=si-lower -S -emit-llvm -o - 2>&1 | \ +// RUN: FileCheck %s --check-prefix=GFX90A-HW + +// REQUIRES: amdgpu-registered-target + +typedef enum memory_order { + memory_order_relaxed = __ATOMIC_RELAXED, + memory_order_acquire = __ATOMIC_ACQUIRE, + memory_order_release = __ATOMIC_RELEASE, + memory_order_acq_rel = __ATOMIC_ACQ_REL, + memory_order_seq_cst = __ATOMIC_SEQ_CST +} memory_order; + +typedef enum memory_scope { + memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, + memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, + memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, + memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, +#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) + memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP +#endif +} memory_scope; + +// GFX90A-HW-REMARK: Hardware instruction generated for atomic fadd operation at memory scope workgroup-one-as due to an unsafe request. [-Rpass=si-lower] +// GFX90A-HW-REMARK: Hardware instruction generated for atomic fadd operation at memory scope agent-one-as due to an unsafe request. [-Rpass=si-lower] +// GFX90A-HW-REMARK: Hardware instruction generated for atomic fadd operation at memory scope wavefront-one-as due to an unsafe request. [-Rpass=si-lower] +// GFX90A-HW-REMARK: global_atomic_add_f32 v0, v[0:1], v2, off glc +// GFX90A-HW-REMARK: global_atomic_add_f32 v0, v[0:1], v2, off glc +// GFX90A-HW-REMARK: global_atomic_add_f32 v0, v[0:1], v2, off glc +// GFX90A-HW-LABEL: @atomic_unsafe_hw +// GFX90A-HW: atomicrmw fadd float addrspace(1)* %{{.*}}, float %{{.*}} syncscope("workgroup-one-as") monotonic, align 4 +// GFX90A-HW: atomicrmw fadd float addrspace(1)* %{{.*}}, float %{{.*}} syncscope("agent-one-as") monotonic, align 4 +// GFX90A-HW: atomicrmw fadd float addrspace(1)* %{{.*}}, float %{{.*}} syncscope("wavefront-one-as") monotonic, align 4 +void atomic_unsafe_hw(__global atomic_float *d, float a) { + float ret1 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group); + float ret2 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_device); + float ret3 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_sub_group); +} + + + diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -19,6 +19,7 @@ #include "SIRegisterInfo.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -12118,6 +12119,26 @@ return DenormMode == DenormalMode::getIEEE(); } +static TargetLowering::AtomicExpansionKind +reportUnsafeHWInst(AtomicRMWInst *RMW, + TargetLowering::AtomicExpansionKind Kind) { + OptimizationRemarkEmitter ORE(RMW->getFunction()); + LLVMContext &Ctx = RMW->getFunction()->getContext(); + SmallVector SSNs; + Ctx.getSyncScopeNames(SSNs); + auto MemScope = SSNs[RMW->getSyncScopeID()].empty() + ? "system" + : SSNs[RMW->getSyncScopeID()]; + ORE.emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "Passed", RMW->getFunction()) + << "Hardware instruction generated for atomic " + << RMW->getOperationName(RMW->getOperation()) + << " operation at memory scope " << MemScope + << " due to an unsafe request."; + }); + return Kind; +} + TargetLowering::AtomicExpansionKind SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { switch (RMW->getOperation()) { @@ -12154,14 +12175,15 @@ SSID == RMW->getContext().getOrInsertSyncScopeID("one-as")) return AtomicExpansionKind::CmpXChg; - return AtomicExpansionKind::None; + return reportUnsafeHWInst(RMW, AtomicExpansionKind::None); } if (AS == AMDGPUAS::FLAT_ADDRESS) return AtomicExpansionKind::CmpXChg; - return RMW->use_empty() ? AtomicExpansionKind::None - : AtomicExpansionKind::CmpXChg; + return RMW->use_empty() + ? reportUnsafeHWInst(RMW, AtomicExpansionKind::None) + : AtomicExpansionKind::CmpXChg; } // DS FP atomics do repect the denormal mode, but the rounding mode is fixed @@ -12169,13 +12191,13 @@ // The only exception is DS_ADD_F64 which never flushes regardless of mode. if (AS == AMDGPUAS::LOCAL_ADDRESS && Subtarget->hasLDSFPAtomics()) { if (!Ty->isDoubleTy()) - return AtomicExpansionKind::None; + return reportUnsafeHWInst(RMW, AtomicExpansionKind::None); return (fpModeMatchesGlobalFPAtomicMode(RMW) || RMW->getFunction() ->getFnAttribute("amdgpu-unsafe-fp-atomics") .getValueAsString() == "true") - ? AtomicExpansionKind::None + ? reportUnsafeHWInst(RMW, AtomicExpansionKind::None) : AtomicExpansionKind::CmpXChg; }