diff --git a/mlir/lib/Dialect/AMDGPU/Transforms/EmulateAtomics.cpp b/mlir/lib/Dialect/AMDGPU/Transforms/EmulateAtomics.cpp --- a/mlir/lib/Dialect/AMDGPU/Transforms/EmulateAtomics.cpp +++ b/mlir/lib/Dialect/AMDGPU/Transforms/EmulateAtomics.cpp @@ -152,7 +152,7 @@ } // gfx9 has no to a very limited support for floating-point min and max. if (chipset.majorVersion == 9) { - if (chipset.minorVersion >= 0x0a) { + if (chipset.minorVersion >= 0x0a && chipset.minorVersion != 0x41) { // gfx90a supports f64 max (and min, but we don't have a min wrapper right // now) but all other types need to be emulated. target.addDynamicallyLegalOp( @@ -162,10 +162,18 @@ } else { target.addIllegalOp(); } + if (chipset.minorVersion == 0x41) { + // gfx941 requires non-CAS atomics to be implemented with CAS loops. + // The workaround here mirrors HIP and OpenMP. + target.addIllegalOp(); + } } patterns .add, - RawBufferAtomicByCasPattern>( + RawBufferAtomicByCasPattern, + RawBufferAtomicByCasPattern, + RawBufferAtomicByCasPattern>( patterns.getContext()); }