diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -7102,36 +7102,51 @@ auto *TRI = MRI.getTargetRegisterInfo(); auto *DefBB = DefMI.getParent(); - const int MaxUseInstScan = 10; - int NumUseInst = 0; + const int MaxUseScan = 10; + int NumUse = 0; - for (auto &UseInst : MRI.use_nodbg_instructions(VReg)) { + for (auto &Use : MRI.use_nodbg_operands(VReg)) { + auto &UseInst = *Use.getParent(); // Don't bother searching between blocks, although it is possible this block // doesn't modify exec. if (UseInst.getParent() != DefBB) return true; - if (++NumUseInst > MaxUseInstScan) + if (++NumUse > MaxUseScan) return true; } + if (NumUse == 0) + return false; + const int MaxInstScan = 20; int NumInst = 0; // Stop scan when we have seen all the uses. for (auto I = std::next(DefMI.getIterator()); ; ++I) { + assert(I != DefBB->end()); + if (I->isDebugInstr()) continue; if (++NumInst > MaxInstScan) return true; - if (I->readsRegister(VReg)) - if (--NumUseInst == 0) - return false; + for (const MachineOperand &Op : I->operands()) { + // We don't check reg masks here as they're used only on calls: + // 1. EXEC is only considered const within one BB + // 2. Call should be a terminator instruction if present in a BB - if (I->modifiesRegister(AMDGPU::EXEC, TRI)) - return true; + if (!Op.isReg()) + continue; + + Register Reg = Op.getReg(); + if (Op.isUse()) { + if (Reg == VReg && --NumUse == 0) + return false; + } else if (TRI->regsOverlap(Reg, AMDGPU::EXEC)) + return true; + } } } diff --git a/llvm/unittests/Target/AMDGPU/CMakeLists.txt b/llvm/unittests/Target/AMDGPU/CMakeLists.txt --- a/llvm/unittests/Target/AMDGPU/CMakeLists.txt +++ b/llvm/unittests/Target/AMDGPU/CMakeLists.txt @@ -13,4 +13,5 @@ add_llvm_target_unittest(AMDGPUTests DwarfRegMappings.cpp + ExecMayBeModifiedBeforeAnyUse.cpp ) diff --git a/llvm/unittests/Target/AMDGPU/ExecMayBeModifiedBeforeAnyUse.cpp b/llvm/unittests/Target/AMDGPU/ExecMayBeModifiedBeforeAnyUse.cpp new file mode 100755 --- /dev/null +++ b/llvm/unittests/Target/AMDGPU/ExecMayBeModifiedBeforeAnyUse.cpp @@ -0,0 +1,72 @@ +//===- llvm/unittests/Target/AMDGPU/ExecMayBeModifiedBeforeAnyUse.cpp -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUSubtarget.h" +#include "AMDGPUTargetMachine.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Target/TargetMachine.h" +#include "gtest/gtest.h" +#include + +using namespace llvm; + +// implementation is in the llvm/unittests/Target/AMDGPU/DwarfRegMappings.cpp +std::unique_ptr +createTargetMachine(std::string TStr, StringRef CPU, StringRef FS); + +TEST(AMDGPUExecMayBeModifiedBeforeAnyUse, TheTest) { + auto TM = createTargetMachine("amdgcn-amd-", "gfx906", ""); + if (!TM) + return; + + GCNSubtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()), + std::string(TM->getTargetFeatureString()), *TM); + + LLVMContext Ctx; + Module Mod("Module", Ctx); + Mod.setDataLayout(TM->createDataLayout()); + + auto *Type = FunctionType::get(Type::getVoidTy(Ctx), false); + auto *F = Function::Create(Type, GlobalValue::ExternalLinkage, "Test", &Mod); + + MachineModuleInfo MMI(TM.get()); + auto MF = std::make_unique(*F, *TM, ST, 42, MMI); + auto *BB = MF->CreateMachineBasicBlock(); + MF->push_back(BB); + + auto E = BB->end(); + DebugLoc DL; + const auto &TII = *ST.getInstrInfo(); + auto &MRI = MF->getRegInfo(); + + // create machine IR + Register R = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + + MachineInstr *DefMI = + BuildMI(*BB, E, DL, TII.get(AMDGPU::S_MOV_B32), R).addImm(42).getInstr(); + + auto First = + BuildMI(*BB, E, DL, TII.get(AMDGPU::S_NOP)).addReg(R, RegState::Implicit); + + BuildMI(*BB, E, DL, TII.get(AMDGPU::S_NOP)).addReg(R, RegState::Implicit); + + // this violates the continuous sequence of R's uses for the first S_NOP + First.addReg(R, RegState::Implicit); + +#ifdef DEBUG_THIS_TEST + MF->dump(); + MRI.dumpUses(R); +#endif + + // make sure execMayBeModifiedBeforeAnyUse doesn't crash + ASSERT_FALSE(execMayBeModifiedBeforeAnyUse(MRI, R, *DefMI)); +}