Index: lib/Target/AMDGPU/AMDGPURegisterBankInfo.h =================================================================== --- lib/Target/AMDGPU/AMDGPURegisterBankInfo.h +++ lib/Target/AMDGPU/AMDGPURegisterBankInfo.h @@ -41,6 +41,9 @@ MachineRegisterInfo &MRI, ArrayRef OpIndices) const; + void constrainOpWithReadfirstlane(MachineInstr &MI, MachineRegisterInfo &MRI, + unsigned OpIdx) const; + /// See RegisterBankInfo::applyMapping. void applyMappingImpl(const OperandsMapper &OpdMapper) const override; @@ -70,6 +73,10 @@ const std::array RegSrcOpIdx, ArrayRef> Table) const; + RegisterBankInfo::InstructionMappings + getInstrAlternativeMappingsIntrinsic( + const MachineInstr &MI, const MachineRegisterInfo &MRI) const; + RegisterBankInfo::InstructionMappings getInstrAlternativeMappingsIntrinsicWSideEffects( const MachineInstr &MI, const MachineRegisterInfo &MRI) const; Index: lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -142,6 +142,28 @@ return AltMappings; } +RegisterBankInfo::InstructionMappings +AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsic( + const MachineInstr &MI, const MachineRegisterInfo &MRI) const { + switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { + case Intrinsic::amdgcn_readlane: { + static const OpRegBankEntry<3> Table[2] = { + // Perfectly legal. + { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 }, + + // Need a readfirstlane for the index. + { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 } + }; + + const std::array RegSrcOpIdx = { { 0, 2, 3 } }; + return addMappingFromTable<3>(MI, MRI, RegSrcOpIdx, makeArrayRef(Table)); + } + + default: + return RegisterBankInfo::getInstrAlternativeMappings(MI); + } +} + RegisterBankInfo::InstructionMappings AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects( const MachineInstr &MI, const MachineRegisterInfo &MRI) const { @@ -365,6 +387,8 @@ AltMappings.push_back(&VMapping); return AltMappings; } + case AMDGPU::G_INTRINSIC: + return getInstrAlternativeMappingsIntrinsic(MI, MRI); case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: return getInstrAlternativeMappingsIntrinsicWSideEffects(MI, MRI); default: @@ -718,6 +742,28 @@ .addReg(SaveExecReg); } +// Legalize an operand that must be an SGPR by inserting a readfirstlane. +void AMDGPURegisterBankInfo::constrainOpWithReadfirstlane( + MachineInstr &MI, MachineRegisterInfo &MRI, unsigned OpIdx) const { + unsigned Reg = MI.getOperand(OpIdx).getReg(); + const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI); + if (Bank != &AMDGPU::VGPRRegBank) + return; + + MachineIRBuilder B(MI); + unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + B.buildInstr(AMDGPU::V_READFIRSTLANE_B32) + .addDef(SGPR) + .addReg(Reg); + + const TargetRegisterClass *Constrained = + constrainGenericRegister(Reg, AMDGPU::VGPR_32RegClass, MRI); + (void)Constrained; + assert(Constrained && "Failed to constrain readfirstlane src reg"); + + MI.getOperand(OpIdx).setReg(SGPR); +} + void AMDGPURegisterBankInfo::applyMappingImpl( const OperandsMapper &OpdMapper) const { MachineInstr &MI = OpdMapper.getMI(); @@ -894,6 +940,27 @@ executeInWaterfallLoop(MI, MRI, { 2 }); return; + case AMDGPU::G_INTRINSIC: { + switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { + case Intrinsic::amdgcn_readlane: { + SmallVector SrcReg(OpdMapper.getVRegs(2)); + + if (!SrcReg.empty()) { + assert(SrcReg.size() == 1); + MI.getOperand(2).setReg(SrcReg[0]); + } + + assert(empty(OpdMapper.getVRegs(0))); + assert(empty(OpdMapper.getVRegs(3))); + + constrainOpWithReadfirstlane(MI, MRI, 3); // Index + return; + } + default: + break; + } + break; + } case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: { switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { case Intrinsic::amdgcn_buffer_load: { @@ -1437,6 +1504,21 @@ SrcSize); break; } + case Intrinsic::amdgcn_readlane: { + // This must be an SGPR, but accept a VGPR. + unsigned IdxReg = MI.getOperand(3).getReg(); + unsigned IdxSize = MRI.getType(IdxReg).getSizeInBits(); + unsigned IdxBank = getRegBankID(IdxReg, MRI, *TRI, AMDGPU::SGPRRegBankID); + OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize); + LLVM_FALLTHROUGH; + } + case Intrinsic::amdgcn_readfirstlane: { + unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + unsigned SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); + OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize); + OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize); + break; + } } break; } Index: test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readfirstlane.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readfirstlane.mir @@ -0,0 +1,32 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s + +--- +name: readfirstlane_s +legalized: true + +body: | + bb.0: + liveins: $sgpr0 + ; CHECK-LABEL: name: readfirstlane_s + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %0 +... + +--- +name: readfirstlane_v +legalized: true + +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: readfirstlane_v + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), %0 +... Index: test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readlane.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.readlane.mir @@ -0,0 +1,71 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s + +--- +name: readlane_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; CHECK-LABEL: name: readlane_ss + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[COPY1]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1 +... + +--- +name: readlane_vs +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + ; CHECK-LABEL: name: readlane_vs + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY]](s32), [[COPY1]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $sgpr0 + %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1 +... + +--- +name: readlane_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: readlane_vv + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1 + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec + ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY]](s32), [[V_READFIRSTLANE_B32_]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1 +... + +--- +name: readlane_sv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + ; CHECK-LABEL: name: readlane_sv + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec + ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), [[COPY2]](s32), [[V_READFIRSTLANE_B32_]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readlane), %0, %1 +...