Index: lib/Target/AMDGPU/SIMemoryLegalizer.cpp =================================================================== --- lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -53,20 +53,28 @@ SyncScope::ID SSID = SyncScope::System; AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent; AtomicOrdering FailureOrdering = AtomicOrdering::SequentiallyConsistent; + bool IsAtomic = true; + bool IsNonTemporal = false; MemOpInfo() = default; MemOpInfo(SyncScope::ID SSID, AtomicOrdering Ordering, - AtomicOrdering FailureOrdering) + AtomicOrdering FailureOrdering, + bool IsAtomic = true, + bool IsNonTemporal = false) : SSID(SSID), Ordering(Ordering), - FailureOrdering(FailureOrdering) {} + FailureOrdering(FailureOrdering), + IsAtomic(IsAtomic), + IsNonTemporal(IsNonTemporal) {} MemOpInfo(const MachineMemOperand *MMO) : SSID(MMO->getSyncScopeID()), Ordering(MMO->getOrdering()), - FailureOrdering(MMO->getFailureOrdering()) {} + FailureOrdering(MMO->getFailureOrdering()), + IsAtomic(MMO->isAtomic()), + IsNonTemporal(MMO->getFlags() & MachineMemOperand::MONonTemporal) {} }; /// \brief LLVM context. @@ -87,6 +95,34 @@ /// \brief List of atomic pseudo instructions. std::list AtomicPseudoMIs; + /// \brief Sets named bit (BitName) to "true" if present in \p MI. Returns + /// true if \p MI is modified, false otherwise. + template + bool enableNamedBit(const MachineBasicBlock::iterator &MI) const { + int BitIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), BitName); + if (BitIdx == -1) + return false; + + MachineOperand &Bit = MI->getOperand(BitIdx); + if (Bit.getImm() == 1) + return false; + + Bit.setImm(1); + return true; + } + + /// \brief Sets GLC bit to "true" if present in \p MI. Returns true if \p MI + /// is modified, false otherwise. + bool enableGLCBit(const MachineBasicBlock::iterator &MI) const { + return enableNamedBit(MI); + } + + /// \brief Sets SLC bit to "true" if present in \p MI. Returns true if \p MI + /// is modified, false otherwise. + bool enableSLCBit(const MachineBasicBlock::iterator &MI) const { + return enableNamedBit(MI); + } + /// \brief Inserts "buffer_wbinvl1_vol" instruction \p Before or after \p MI. /// Always returns true. bool insertBufferWbinvl1Vol(MachineBasicBlock::iterator &MI, @@ -96,10 +132,6 @@ bool insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI, bool Before = true) const; - /// \brief Sets GLC bit if present in \p MI. Returns true if \p MI is - /// modified, false otherwise. - bool setGLC(const MachineBasicBlock::iterator &MI) const; - /// \brief Removes all processed atomic pseudo instructions from the current /// function. Returns true if current function is modified, false otherwise. bool removeAtomicPseudoMIs(); @@ -195,19 +227,6 @@ return true; } -bool SIMemoryLegalizer::setGLC(const MachineBasicBlock::iterator &MI) const { - int GLCIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::glc); - if (GLCIdx == -1) - return false; - - MachineOperand &GLC = MI->getOperand(GLCIdx); - if (GLC.getImm() == 1) - return false; - - GLC.setImm(1); - return true; -} - bool SIMemoryLegalizer::removeAtomicPseudoMIs() { if (AtomicPseudoMIs.empty()) return false; @@ -236,7 +255,7 @@ return MemOpInfo(); const MachineMemOperand *MMO = *MI->memoperands_begin(); - if (!MMO->isAtomic()) + if (!MMO->isAtomic() && !(MMO->getFlags() & MachineMemOperand::MONonTemporal)) return None; return MemOpInfo(MMO); @@ -252,7 +271,7 @@ return MemOpInfo(); const MachineMemOperand *MMO = *MI->memoperands_begin(); - if (!MMO->isAtomic()) + if (!MMO->isAtomic() && !(MMO->getFlags() & MachineMemOperand::MONonTemporal)) return None; return MemOpInfo(MMO); @@ -313,11 +332,20 @@ assert(MI->mayLoad() && !MI->mayStore()); bool Changed = false; + if (MOI.IsNonTemporal) { + // FIXME: handle non-temporal atomic loads? + assert(!MOI.IsAtomic); + + Changed |= enableSLCBit(MI); + Changed |= enableGLCBit(MI); + return Changed; + } + if (MOI.SSID == SyncScope::System || MOI.SSID == MMI->getAgentSSID()) { if (MOI.Ordering == AtomicOrdering::Acquire || MOI.Ordering == AtomicOrdering::SequentiallyConsistent) - Changed |= setGLC(MI); + Changed |= enableGLCBit(MI); if (MOI.Ordering == AtomicOrdering::SequentiallyConsistent) Changed |= insertWaitcntVmcnt0(MI); @@ -344,6 +372,15 @@ assert(!MI->mayLoad() && MI->mayStore()); bool Changed = false; + if (MOI.IsNonTemporal) { + // FIXME: handle non-temporal atomic stores? + assert(!MOI.IsAtomic); + + Changed |= enableSLCBit(MI); + Changed |= enableGLCBit(MI); + return Changed; + } + if (MOI.SSID == SyncScope::System || MOI.SSID == MMI->getAgentSSID()) { if (MOI.Ordering == AtomicOrdering::Release || @@ -418,7 +455,7 @@ } else if (MOI.SSID == SyncScope::SingleThread || MOI.SSID == MMI->getWorkgroupSSID() || MOI.SSID == MMI->getWavefrontSSID()) { - Changed |= setGLC(MI); + Changed |= enableGLCBit(MI); return Changed; } else { reportUnknownSynchScope(MI); @@ -449,7 +486,7 @@ } else if (MOI.SSID == SyncScope::SingleThread || MOI.SSID == MMI->getWorkgroupSSID() || MOI.SSID == MMI->getWavefrontSSID()) { - Changed |= setGLC(MI); + Changed |= enableGLCBit(MI); return Changed; } else { reportUnknownSynchScope(MI); Index: test/CodeGen/AMDGPU/memory-legalizer-nontemporal-load.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/memory-legalizer-nontemporal-load.ll @@ -0,0 +1,14 @@ +; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck %s + +; CHECK-LABEL: {{^}}nontemporal_load +; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}} +define amdgpu_kernel void @nontemporal_load( + i32 addrspace(4)* %in, i32 addrspace(4)* %out) { +entry: + %val = load i32, i32 addrspace(4)* %in, align 4, !nontemporal !0 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +!0 = !{i32 0} Index: test/CodeGen/AMDGPU/memory-legalizer-nontemporal-store.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/memory-legalizer-nontemporal-store.ll @@ -0,0 +1,13 @@ +; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck %s + +; CHECK-LABEL: {{^}}nontemporal_store +; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc slc{{$}} +define amdgpu_kernel void @nontemporal_store( + i32 %in, i32 addrspace(4)* %out) { +entry: + store i32 %in, i32 addrspace(4)* %out, align 4, !nontemporal !0 + ret void +} + +!0 = !{i32 0}