diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -117,6 +117,8 @@ /// vreg that the swifterror should be copied into after the call. Register SwiftErrorVReg; + Register ConvergenceCtrlToken; + /// Original IR callsite corresponding to this call, if available. const CallBase *CB = nullptr; @@ -583,6 +585,7 @@ bool lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &Call, ArrayRef ResRegs, ArrayRef> ArgRegs, Register SwiftErrorVReg, + Register ConvergenceCtrlToken, std::function GetCalleeReg) const; /// For targets which want to use big-endian can enable it with diff --git a/llvm/include/llvm/CodeGen/LowLevelType.h b/llvm/include/llvm/CodeGen/LowLevelType.h --- a/llvm/include/llvm/CodeGen/LowLevelType.h +++ b/llvm/include/llvm/CodeGen/LowLevelType.h @@ -45,6 +45,16 @@ /*AddressSpace=*/0}; } + /// Get a low-level token; just a scalar with zero bits (or no size). + static constexpr LLT token() { + return LLT{/*isPointer=*/false, + /*isVector=*/false, + /*isScalar=*/true, + ElementCount::getFixed(0), + 0, + /*AddressSpace=*/0}; + } + /// Get a low-level pointer in the given address space. static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits) { assert(SizeInBits > 0 && "invalid pointer size"); @@ -288,6 +298,28 @@ /// described in static const *Field variables. Each of these variables /// is a 2-element array, with the first element describing the bitfield size /// and the second element describing the bitfield offset. + /// + /// +--------+---------+--------+----------+----------------------+ + /// |isScalar|isPointer|isVector| RawData |Notes | + /// +--------+---------+--------+----------+----------------------+ + /// | 0 | 0 | 0 | 0 |Invalid | + /// +--------+---------+--------+----------+----------------------+ + /// | 0 | 0 | 1 | 0 |Tombstone Key | + /// +--------+---------+--------+----------+----------------------+ + /// | 0 | 1 | 0 | 0 |Empty Key | + /// +--------+---------+--------+----------+----------------------+ + /// | 1 | 0 | 0 | 0 |Token | + /// +--------+---------+--------+----------+----------------------+ + /// | 1 | 0 | 0 | non-zero |Scalar | + /// +--------+---------+--------+----------+----------------------+ + /// | 0 | 1 | 0 | non-zero |Pointer | + /// +--------+---------+--------+----------+----------------------+ + /// | 0 | 0 | 1 | non-zero |Vector of non-pointer | + /// +--------+---------+--------+----------+----------------------+ + /// | 0 | 1 | 1 | non-zero |Vector of pointer | + /// +--------+---------+--------+----------+----------------------+ + /// + /// Everything else is reserved. typedef int BitFieldInfo[2]; /// /// This is how the bitfields are packed per Kind: diff --git a/llvm/include/llvm/CodeGen/MachineConvergenceVerifier.h b/llvm/include/llvm/CodeGen/MachineConvergenceVerifier.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/CodeGen/MachineConvergenceVerifier.h @@ -0,0 +1,28 @@ +//===- MachineConvergenceVerifier.h - Verify convergenctrl ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file declares the GMIR IR specialization of the +/// GenericConvergenceVerifier template. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_MACHINECONVERGENCEVERIFIER_H +#define LLVM_CODEGEN_MACHINECONVERGENCEVERIFIER_H + +#include "llvm/ADT/GenericConvergenceVerifier.h" +#include "llvm/CodeGen/MachineSSAContext.h" + +namespace llvm { + +using MachineConvergenceVerifier = + GenericConvergenceVerifier; + +} // namespace llvm + +#endif // LLVM_CODEGEN_MACHINECONVERGENCEVERIFIER_H diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -121,6 +121,7 @@ MachineBranchProbabilityInfo.cpp MachineCFGPrinter.cpp MachineCombiner.cpp + MachineConvergenceVerifier.cpp MachineCopyPropagation.cpp MachineCSE.cpp MachineCheckDebugify.cpp diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Target/TargetMachine.h" @@ -87,10 +88,20 @@ }); } +static bool hasConvergenceEntryToken(const CallBase &CB) { + auto Bundle = CB.getOperandBundle(LLVMContext::OB_convergencectrl); + if (!Bundle) + return true; + auto *Token = Bundle->Inputs[0].get(); + auto *Def = cast(Token); + return Def->getIntrinsicID() == Intrinsic::experimental_convergence_entry; +} + bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, ArrayRef ResRegs, ArrayRef> ArgRegs, Register SwiftErrorVReg, + Register ConvergenceCtrlToken, std::function GetCalleeReg) const { CallLoweringInfo Info; const DataLayout &DL = MIRBuilder.getDataLayout(); @@ -121,6 +132,8 @@ CanBeTailCalled = false; } + if (!hasConvergenceEntryToken(CB)) + CanBeTailCalled = false; // First step is to marshall all the function's parameters into the correct // physregs and memory locations. Gather the sequence of argument types that @@ -176,6 +189,7 @@ Info.KnownCallees = CB.getMetadata(LLVMContext::MD_callees); Info.CallConv = CallConv; Info.SwiftErrorVReg = SwiftErrorVReg; + Info.ConvergenceCtrlToken = ConvergenceCtrlToken; Info.IsMustTailCall = CB.isMustTailCall(); Info.IsTailCall = CanBeTailCalled; Info.IsVarArg = IsVarArg; diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -210,8 +210,9 @@ auto *VRegs = VMap.getVRegs(Val); auto *Offsets = VMap.getOffsets(Val); - assert(Val.getType()->isSized() && - "Don't know how to create an empty vreg"); + if (!Val.getType()->isTokenTy()) + assert(Val.getType()->isSized() && + "Don't know how to create an empty vreg"); SmallVector SplitTys; computeValueLLTs(*DL, *Val.getType(), SplitTys, @@ -2456,12 +2457,18 @@ } } + Register ConvergenceCtrlToken = 0; + if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_convergencectrl)) { + auto *Token = Bundle->Inputs[0].get(); + ConvergenceCtrlToken = getOrCreateVReg(*Token); + } + // We don't set HasCalls on MFI here yet because call lowering may decide to // optimize into tail calls. Instead, we defer that to selection where a final // scan is done to check if any instructions are calls. - bool Success = - CLI->lowerCall(MIRBuilder, CB, Res, Args, SwiftErrorVReg, - [&]() { return getOrCreateVReg(*CB.getCalledOperand()); }); + bool Success = CLI->lowerCall( + MIRBuilder, CB, Res, Args, SwiftErrorVReg, ConvergenceCtrlToken, + [&]() { return getOrCreateVReg(*CB.getCalledOperand()); }); // Check if we just inserted a tail call. if (Success) { @@ -2509,8 +2516,13 @@ assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic"); - if (translateKnownIntrinsic(CI, ID, MIRBuilder)) + // Note: Known intrinsics are target-independent, and not expected to be + // convergent. Hence we don't look for a convergencectrl operand bundle if we + // are calling a known intrinsic. + if (translateKnownIntrinsic(CI, ID, MIRBuilder)) { + assert(!CI.countOperandBundlesOfType(LLVMContext::OB_convergencectrl)); return true; + } ArrayRef ResultRegs; if (!CI.getType()->isVoidTy()) @@ -2575,6 +2587,14 @@ MF->getMachineMemOperand(MPI, Info.flags, MemTy, Alignment, CI.getAAMetadata())); } + if (CI.isConvergent()) { + if (auto Bundle = CI.getOperandBundle(LLVMContext::OB_convergencectrl)) { + auto *Token = Bundle->Inputs[0].get(); + Register TokenReg = getOrCreateVReg(*Token); + MIB.addUse(TokenReg, RegState::Implicit); + } + } + return true; } diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp --- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp @@ -594,6 +594,14 @@ } } + if (auto Bundle = Call.getOperandBundle(LLVMContext::OB_convergencectrl)) { + auto *Token = Bundle->Inputs[0].get(); + ArrayRef SourceRegs = GetOrCreateVRegs(*Token); + assert(SourceRegs.size() == 1 && + "Expected the control token to fit into a single virtual register"); + Inst.addUse(SourceRegs[0], RegState::Implicit); + } + if (const MDNode *SrcLoc = Call.getMetadata("srcloc")) Inst.addMetadata(SrcLoc); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -119,8 +119,27 @@ MIRBuilder.setInstrAndDebugLoc(MI); - if (isa(MI)) + if (auto *GI = dyn_cast(&MI)) { + auto ID = GI->getIntrinsicID(); + assert(ID != Intrinsic::not_intrinsic); + switch (ID) { + default: + break; + case Intrinsic::experimental_convergence_anchor: + case Intrinsic::experimental_convergence_entry: + case Intrinsic::experimental_convergence_loop: + assert(MI.getNumDefs() == 1); + Register Token = MI.defs().begin()->getReg(); + for (auto &Use : MRI.use_operands(Token)) { + auto *UserInstr = Use.getParent(); + UserInstr->removeOperand(Use.getOperandNo()); + } + MI.eraseFromParent(); + return Legalized; + } + return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize; + } auto Step = LI.getAction(MI, MRI); switch (Step.Action) { case Legal: diff --git a/llvm/lib/CodeGen/LowLevelTypeUtils.cpp b/llvm/lib/CodeGen/LowLevelTypeUtils.cpp --- a/llvm/lib/CodeGen/LowLevelTypeUtils.cpp +++ b/llvm/lib/CodeGen/LowLevelTypeUtils.cpp @@ -39,6 +39,10 @@ return LLT::scalar(SizeInBits); } + if (Ty.isTokenTy()) { + return LLT::token(); + } + return LLT(); } diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -1921,10 +1921,13 @@ if (Token.range().front() == 's') { auto ScalarSize = APSInt(Token.range().drop_front()).getZExtValue(); - if (!verifyScalarSize(ScalarSize)) - return error("invalid size for scalar type"); - - Ty = LLT::scalar(ScalarSize); + if (ScalarSize) { + if (!verifyScalarSize(ScalarSize)) + return error("invalid size for scalar type"); + Ty = LLT::scalar(ScalarSize); + } else { + Ty = LLT::token(); + } lex(); return false; } else if (Token.range().front() == 'p') { @@ -1965,7 +1968,7 @@ if (Token.range().front() == 's') { auto ScalarSize = APSInt(Token.range().drop_front()).getZExtValue(); if (!verifyScalarSize(ScalarSize)) - return error("invalid size for scalar type"); + return error("invalid size for scalar element in vector"); Ty = LLT::scalar(ScalarSize); } else if (Token.range().front() == 'p') { const DataLayout &DL = MF.getDataLayout(); diff --git a/llvm/lib/CodeGen/MachineConvergenceVerifier.cpp b/llvm/lib/CodeGen/MachineConvergenceVerifier.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/CodeGen/MachineConvergenceVerifier.cpp @@ -0,0 +1,79 @@ +//===- ConvergenceVerifier.cpp - Verify convergence control -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineConvergenceVerifier.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineSSAContext.h" +#include "llvm/IR/GenericConvergenceVerifierImpl.h" + +using namespace llvm; + +template <> +const MachineInstr * +GenericConvergenceVerifier::findAndCheckConvergenceTokenUsed( + const MachineInstr &MI) { + auto &MRI = Context.getFunction()->getRegInfo(); + const MachineInstr *TokenDef = nullptr; + + for (auto &MO : MI.uses()) { + if (!MO.isReg()) + continue; + + const auto RegTy = MRI.getType(MO.getReg()); + if (RegTy != LLT::token()) + continue; + + // A token type operand is a convergence control token iff its unique + // definition is a convergence control intrinsic. We can't really verify + // that since the token type may have other implicit uses. Instead we use it + // as a way to identify convergence control token operands. + const auto *Def = MRI.getUniqueVRegDef(MO.getReg()); + if (!Def) + continue; + if (!isConvergenceControlIntrinsic(MachineSSAContext::getIntrinsicID(*Def))) + continue; + + CheckOrNull(MI.isCall() || isa(MI), + "Convergence control tokens can only be used by call " + "instructions or intrinsics.", + {Context.print(MO.getReg()), Context.print(&MI)}); + + CheckOrNull(MO.isImplicit(), + "Convergence control tokens can only be used implicitly.", + {Context.print(MO.getReg()), Context.print(&MI)}); + + CheckOrNull(!TokenDef, + "A call can use at most one convergence control token.", + {Context.print(MO.getReg()), Context.print(&MI)}); + + TokenDef = Def; + } + + if (TokenDef) + Tokens[&MI] = TokenDef; + + return TokenDef; +} + +template <> +bool GenericConvergenceVerifier::isInsideConvergentFunction( + const MachineInstr &MI) { + // The class MachineFunction does not have any property to indicate whether it + // is convergent. Trivially return true so that the check always passes. + return true; +} +template <> +bool GenericConvergenceVerifier::isConvergent( + const MachineInstr &MI) { + return MI.isConvergent(); +} + +template class llvm::GenericConvergenceVerifier; diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -40,6 +40,8 @@ #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineConvergenceVerifier.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -214,6 +216,11 @@ LiveStacks *LiveStks = nullptr; SlotIndexes *Indexes = nullptr; + // This is calculated only when trying to verify convergence control tokens. + // Similar to the LLVM IR verifier, we calculate this locally instead of + // relying on the pass manager. + MachineDomTree DT; + void visitMachineFunctionBefore(); void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB); void visitMachineBundleBefore(const MachineInstr *MI); @@ -2907,7 +2914,33 @@ } } +static void +verifyConvergenceControl(const MachineFunction &MF, MachineDomTree &DT, + std::function FailureCB) { + MachineConvergenceVerifier CV; + CV.initialize(&errs(), FailureCB, MF); + + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::Selected)) + return; + + for (const auto &MBB : MF) { + for (const auto &MI : MBB.instrs()) + CV.visit(MI); + } + + if (CV.sawTokens()) { + DT.recalculate(const_cast(MF)); + CV.verify(DT); + } +} + void MachineVerifier::visitMachineFunctionAfter() { + auto FailureCB = [this](const Twine &Message) { + report(Message.str().c_str(), MF); + }; + verifyConvergenceControl(*MF, DT, FailureCB); + calcRegsPassed(); for (const MachineBasicBlock &MBB : *MF) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -1267,6 +1267,9 @@ if (!handleAssignments(Handler, OutArgs, CCInfo, ArgLocs, MIRBuilder)) return false; + if (Info.ConvergenceCtrlToken) { + MIB.addUse(Info.ConvergenceCtrlToken, RegState::Implicit); + } handleImplicitCallArguments(MIRBuilder, MIB, ST, *FuncInfo, ImplicitArgRegs); // If we have -tailcallopt, we need to adjust the stack. We'll do the call @@ -1393,6 +1396,9 @@ const SIMachineFunctionInfo *MFI = MF.getInfo(); + if (Info.ConvergenceCtrlToken) { + MIB.addUse(Info.ConvergenceCtrlToken, RegState::Implicit); + } handleImplicitCallArguments(MIRBuilder, MIB, ST, *MFI, ImplicitArgRegs); // Get a count of how many bytes are to be pushed on the stack. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1022,6 +1022,10 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const { unsigned IntrinsicID = cast(I).getIntrinsicID(); switch (IntrinsicID) { + case Intrinsic::experimental_convergence_anchor: + case Intrinsic::experimental_convergence_entry: + case Intrinsic::experimental_convergence_loop: + return true; case Intrinsic::amdgcn_if_break: { MachineBasicBlock *BB = I.getParent(); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/convergence-tokens.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/convergence-tokens.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/convergence-tokens.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s + +define void @test_readfirstlane(ptr addrspace(1) %out, i32 %src) #1 { +; CHECK-LABEL: test_readfirstlane: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_readfirstlane_b32 s4, v2 +; CHECK-NEXT: v_mov_b32_e32 v2, s4 +; CHECK-NEXT: global_store_dword v[0:1], v2, off +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] + %t = call token @llvm.experimental.convergence.anchor() + %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %src) [ "convergencectrl"(token %t) ] + store i32 %readfirstlane, ptr addrspace(1) %out, align 4 + ret void +} + +declare i32 @llvm.amdgcn.readfirstlane(i32) #0 + +declare token @llvm.experimental.convergence.entry() +declare token @llvm.experimental.convergence.anchor() +declare token @llvm.experimental.convergence.loop() + +attributes #0 = { nounwind readnone convergent } +attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-convergence-tokens.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-convergence-tokens.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-convergence-tokens.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s + +define void @test_readfirstlane(ptr addrspace(1) %out, i32 %src) #1 { + ; CHECK-LABEL: name: test_readfirstlane + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s0) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.experimental.convergence.anchor) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY2]](s32), implicit [[INTRINSIC_CONVERGENT]](s0) + ; CHECK-NEXT: G_STORE [[INTRINSIC_CONVERGENT1]](s32), [[MV]](p1) :: (store (s32) into %ir.out, addrspace 1) + ; CHECK-NEXT: SI_RETURN + %t = call token @llvm.experimental.convergence.anchor() + %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %src) [ "convergencectrl"(token %t) ] + store i32 %readfirstlane, ptr addrspace(1) %out, align 4 + ret void +} + +declare i32 @llvm.amdgcn.readfirstlane(i32) #0 + +declare token @llvm.experimental.convergence.entry() +declare token @llvm.experimental.convergence.anchor() +declare token @llvm.experimental.convergence.loop() + +attributes #0 = { nounwind readnone convergent } +attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/verify-convergencectrl/basic.mir b/llvm/test/CodeGen/AMDGPU/verify-convergencectrl/basic.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/verify-convergencectrl/basic.mir @@ -0,0 +1,42 @@ +# RUN: not --crash llc -mtriple=amdgcn--1 -run-pass=machineverifier -o /dev/null %s 2>&1 | FileCheck %s +--- +name: basic +tracksRegLiveness: true +body: | + bb.0: + %0:_(s0) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.experimental.convergence.anchor) + ; It's impossible to check that no token is passed to entry, since the + ; verifier will return when it sees that the entry is not the first instruction + ; in the function. + ; CHECK: Entry intrinsic can occur only at the start of the basic block. + ; CHECK: G_INTRINSIC_CONVERGENT intrinsic(@llvm.experimental.convergence.entry) + %1:_(s0) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.experimental.convergence.entry), implicit %0:_(s0) + ; CHECK: Loop intrinsic must have a convergencectrl token operand. + ; CHECK: G_INTRINSIC_CONVERGENT intrinsic(@llvm.experimental.convergence.loop) + %2:_(s0) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.experimental.convergence.loop) + ; CHECK: Loop intrinsic can occur only at the start of the basic block. + ; CHECK: G_INTRINSIC_CONVERGENT intrinsic(@llvm.experimental.convergence.loop) + %3:_(s0) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.experimental.convergence.loop), implicit %0:_(s0) + ; CHECK: Convergence control tokens can only be used implicitly. + ; CHECK: G_INTRINSIC_CONVERGENT intrinsic(@llvm.experimental.convergence.loop) + %4:_(s0) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.experimental.convergence.loop), %0:_(s0) + %5:_(s1) = IMPLICIT_DEF + G_BRCOND %6:sgpr_64(s1), %bb.1 + G_BR %bb.2 + + bb.1: + ; CHECK: Entry intrinsic can occur only in the entry block. + ; CHECK: G_INTRINSIC_CONVERGENT intrinsic(@llvm.experimental.convergence.entry) + %7:_(s0) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.experimental.convergence.entry) + + bb.2: + ; CHECK: Convergence control tokens can only be used by call instructions or intrinsics. + ; CHECK: G_PHI + %8:_(s0) = G_PHI %0:_(s0), %bb.0, %0:_(s0), %bb.1 + %9:_(s0) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.experimental.convergence.anchor) + %10:sgpr_64 = IMPLICIT_DEF + %6:sgpr_64(s1) = G_SI_CALL %10:sgpr_64, 1, implicit %9:_(s0) + %11:sgpr_64 = G_SI_CALL %10:sgpr_64, 2, implicit %9:_(s0), implicit %9:_(s0) + %12:sgpr_64 = G_SI_CALL %10:sgpr_64, 3 + %13:sgpr_64 = G_SI_CALL %10:sgpr_64, 4, implicit %9:_(s0) +... diff --git a/llvm/test/CodeGen/AMDGPU/verify-convergencectrl/cycles.mir b/llvm/test/CodeGen/AMDGPU/verify-convergencectrl/cycles.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/verify-convergencectrl/cycles.mir @@ -0,0 +1,53 @@ +# RUN: not --crash llc -mtriple=amdgcn-- -run-pass=machineverifier -o /dev/null %s 2>&1 | FileCheck %s +--- +name: cycles +body: | + bb.0: + %0:_(s0) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.experimental.convergence.anchor) + %1:_(s1) = IMPLICIT_DEF + %2:_(s1) = IMPLICIT_DEF + G_BRCOND %2:_(s1), %bb.9 + G_BR %bb.1 + + bb.1: + G_BRCOND %2:_(s1), %bb.8 + G_BR %bb.5 + + bb.2: + G_BRCOND %2:_(s1), %bb.3 + G_BR %bb.4 + + bb.3: + ; CHECK: Cycle heart must dominate all blocks in the cycle. + ; Irreducible cycle: entries(bb.4 bb.3) + %3:_(s0) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.experimental.convergence.loop), implicit %0:_(s0) + G_BR %bb.4 + + bb.4: + G_BR %bb.3 + + bb.5: + G_BRCOND %2:_(s1), %bb.6 + G_BR %bb.2 + + bb.6: + G_BR %bb.7 + + bb.7: + ; CHECK: Cycle heart must dominate all blocks in the cycle. + ; Reducible cycle: entries(bb.6) bb.7 + %4:_(s0) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.experimental.convergence.loop), implicit %0:_(s0) + G_BR %bb.6 + + bb.8: + ; CHECK: Two static convergence token uses in a cycle that does not contain either token's definition. + %5:_(s0) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.experimental.convergence.loop), implicit %0:_(s0) + %6:_(s0) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.experimental.convergence.loop), implicit %0:_(s0) + G_BR %bb.8 + + bb.9: + ; CHECK: Convergence token used by an instruction other than llvm.experimental.convergence.loop in a cycle that does not contain the token's definition. + %7:sgpr_64 = G_SI_CALL %1:_(s1), 3, implicit %0:_(s0) + G_BR %bb.9 + +... diff --git a/llvm/test/CodeGen/AMDGPU/verify-convergencectrl/mixed2.mir b/llvm/test/CodeGen/AMDGPU/verify-convergencectrl/mixed2.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/verify-convergencectrl/mixed2.mir @@ -0,0 +1,15 @@ +# RUN: not --crash llc -mtriple=amdgcn-- -run-pass=machineverifier -o /dev/null %s 2>&1 | FileCheck %s +--- +name: mixed2 +body: | + bb.0: + %0:sgpr_64 = IMPLICIT_DEF + %1:sgpr_64 = G_SI_CALL %0, 1 + ; CHECK: Cannot mix controlled and uncontrolled convergence in the same function. + ; CHECK: G_INTRINSIC_CONVERGENT intrinsic(@llvm.experimental.convergence.anchor) + %2:_(s0) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.experimental.convergence.anchor) + ; CHECK: Cannot mix controlled and uncontrolled convergence in the same function. + ; CHECK: G_SI_CALL %{{[0-9]}}:sgpr_64, 2 + %3:sgpr_64 = G_SI_CALL %0, 2, implicit %2(s0) + +... diff --git a/llvm/test/CodeGen/AMDGPU/verify-convergencectrl/region-nesting.mir b/llvm/test/CodeGen/AMDGPU/verify-convergencectrl/region-nesting.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/verify-convergencectrl/region-nesting.mir @@ -0,0 +1,25 @@ +# RUN: not --crash llc -mtriple=amdgcn-- -run-pass=machineverifier -o /dev/null %s 2>&1 | FileCheck %s +--- +name: region_nesting +body: | + bb.0: + %0:_(s0) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.experimental.convergence.anchor) + %1:_(s0) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.experimental.convergence.anchor) + %2:sgpr_64 = IMPLICIT_DEF + %3:sgpr_64 = G_SI_CALL %2, 1, implicit %0(s0) + ; CHECK: Convergence region is not well-nested. + ; CHECK: G_SI_CALL %{{[0-9]}}:sgpr_64, 2 + %3:sgpr_64 = G_SI_CALL %2, 2, implicit %1(s0) + %4:_(s1) = IMPLICIT_DEF + G_BRCOND %4(s1), %bb.1 + G_BR %bb.2 + + bb.1: + %5:sgpr_64 = G_SI_CALL %2, 3, implicit %0(s0) + + bb.2: + ; CHECK: Convergence region is not well-nested. + ; CHECK: G_SI_CALL %{{[0-9]}}:sgpr_64, 4 + %6:sgpr_64 = G_SI_CALL %2, 4, implicit %1(s0) + +... diff --git a/llvm/test/CodeGen/MIR/AArch64/parse-low-level-type-invalid4.mir b/llvm/test/CodeGen/MIR/AArch64/parse-low-level-type-invalid4.mir deleted file mode 100644 --- a/llvm/test/CodeGen/MIR/AArch64/parse-low-level-type-invalid4.mir +++ /dev/null @@ -1,10 +0,0 @@ -# RUN: not llc -mtriple=aarch64-- -run-pass none -o /dev/null %s 2>&1 | FileCheck %s -# When a low-level type is 0 bits ---- -name: test_scalar_size_0 -body: | - bb.0: - liveins: $x0 - ; CHECK: [[@LINE+1]]:10: invalid size for scalar type - %0:_(s0) = G_IMPLICIT_DEF -... diff --git a/llvm/test/CodeGen/MIR/AArch64/parse-low-level-type-invalid6.mir b/llvm/test/CodeGen/MIR/AArch64/parse-low-level-type-invalid6.mir --- a/llvm/test/CodeGen/MIR/AArch64/parse-low-level-type-invalid6.mir +++ b/llvm/test/CodeGen/MIR/AArch64/parse-low-level-type-invalid6.mir @@ -5,6 +5,6 @@ body: | bb.0: liveins: $x0 - ; CHECK: [[@LINE+1]]:15: invalid size for scalar type + ; CHECK: [[@LINE+1]]:15: invalid size for scalar element in vector %0:_(<2 x s0>) = G_IMPLICIT_DEF ...