diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -795,6 +795,7 @@ switch (Scope) { case SIAtomicScope::SYSTEM: case SIAtomicScope::AGENT: + // Request L1 MISS_EVICT. Changed |= enableGLCBit(MI); break; case SIAtomicScope::WORKGROUP: @@ -837,8 +838,9 @@ assert(MI->mayLoad() && MI->mayStore()); bool Changed = false; - /// The L1 cache is write through so does not need to be bypassed. There is no - /// bypass control for the L2 cache at the isa level. + /// Do not set GLC for RMW atomic operations as L0/L1 cache is automatically + /// bypassed, and the GLC bit is instead used to indicate if they are + /// return or no-return. return Changed; } @@ -860,6 +862,8 @@ bool Changed = false; if (IsVolatile) { + // Request L1 MISS_EVICT for load instructions. + // Stores will automatically propagate to L2 (write-combine). if (Op == SIMemOp::LOAD) Changed |= enableGLCBit(MI); @@ -875,8 +879,10 @@ } if (IsNonTemporal) { - // Request L1 MISS_EVICT and L2 STREAM for load and store instructions. + // Request L1 MISS_EVICT for load and store instructions. This makes + // L1 write-through for store instructions. Changed |= enableGLCBit(MI); + // Request L2 STREAM. Changed |= enableSLCBit(MI); return Changed; } @@ -1097,6 +1103,7 @@ switch (Scope) { case SIAtomicScope::SYSTEM: case SIAtomicScope::AGENT: + // Request L1 MISS_EVICT. Changed |= enableGLCBit(MI); break; case SIAtomicScope::WORKGROUP: @@ -1206,6 +1213,8 @@ bool Changed = false; if (IsVolatile) { + // Request L1 MISS_EVICT for load instructions. + // Stores will automatically propagate to L2 (write-combine). if (Op == SIMemOp::LOAD) Changed |= enableGLCBit(MI); @@ -1221,8 +1230,10 @@ } if (IsNonTemporal) { - // Request L1 MISS_EVICT and L2 STREAM for load and store instructions. + // Request L1 MISS_EVICT for load and store instructions. This makes + // L1 write-through for store instructions. Changed |= enableGLCBit(MI); + // Request L2 STREAM. Changed |= enableSLCBit(MI); return Changed; } @@ -1380,12 +1391,10 @@ bool Changed = false; if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) { - /// TODO Do not set glc for rmw atomic operations as they - /// implicitly bypass the L0/L1 caches. - switch (Scope) { case SIAtomicScope::SYSTEM: case SIAtomicScope::AGENT: + // Request L0 (GLC) and L1 (DLC) MISS_EVICT. Changed |= enableGLCBit(MI); Changed |= enableDLCBit(MI); break; @@ -1434,6 +1443,9 @@ bool Changed = false; if (IsVolatile) { + // Request L0 (GLC) and L1 (DLC) MISS_EVICT for load instructions. + // Stores will automatically propagate to L2 with default policy for L0 + // being MISS_LRU (write-combine) and L1 always bypassed. if (Op == SIMemOp::LOAD) { Changed |= enableGLCBit(MI); Changed |= enableDLCBit(MI); @@ -1450,7 +1462,9 @@ } if (IsNonTemporal) { - // Request L0/L1 HIT_EVICT and L2 STREAM for load and store instructions. + // Request L0 and L1 HIT_EVICT for load instructions, and L2 STREAM for + // load and store instructions. L0 will still be MISS_LRU for store + // instructions unless GLC is set elsewhere. Changed |= enableSLCBit(MI); return Changed; }