Index: lib/Target/AMDGPU/AMDGPUGISel.td =================================================================== --- /dev/null +++ lib/Target/AMDGPU/AMDGPUGISel.td @@ -0,0 +1,47 @@ +//===-- AMDGPUGIsel.td - AMDGPU GlobalISel Patterns---------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This files contains patterns that should only be used by GlobalISel. For +// example patterns for V_* instructions that have S_* equivalents. +// SelectionDAG does not support selecting V_* instructions. +//===----------------------------------------------------------------------===// + +include "AMDGPU.td" + +def sd_ssrc : ComplexPattern; +def gi_ssrc : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + +def sd_vsrc0 : ComplexPattern; +def gi_vsrc0 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + +class GISelSop2Pat < + SDPatternOperator node, + Instruction inst, + ValueType dst_vt, + ValueType src0_vt = dst_vt, ValueType src1_vt = src0_vt> : GCNPat < + + (dst_vt (node (src0_vt (sd_ssrc src0_vt:$src0)), (src1_vt (sd_ssrc src1_vt:$src1)))), + (inst src0_vt:$src0, src1_vt:$src1) +>; + +class GISelVop2Pat < + SDPatternOperator node, + Instruction inst, + ValueType dst_vt, + ValueType src0_vt = dst_vt, ValueType src1_vt = src0_vt> : GCNPat < + + (dst_vt (node (src0_vt (sd_vsrc0 src0_vt:$src0)), (src1_vt VGPR_32:$src1))), + (inst src0_vt:$src0, src1_vt:$src1) +>; + +def : GISelSop2Pat ; +def : GISelVop2Pat ; Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -19,10 +19,17 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" +namespace { +#define GET_GLOBALISEL_PREDICATE_BITSET +#include "AMDGPUGenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATE_BITSET +} + namespace llvm { class AMDGPUInstrInfo; class AMDGPURegisterBankInfo; +class AMDGPUSubtarget; class MachineInstr; class MachineOperand; class MachineRegisterInfo; @@ -33,9 +40,11 @@ class AMDGPUInstructionSelector : public InstructionSelector { public: AMDGPUInstructionSelector(const SISubtarget &STI, - const AMDGPURegisterBankInfo &RBI); + const AMDGPURegisterBankInfo &RBI, + const AMDGPUTargetMachine &TM); bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override; + static const char *getName(); private: struct GEPInfo { @@ -46,6 +55,9 @@ GEPInfo(const MachineInstr &GEP) : GEP(GEP), Imm(0) { } }; + /// tblgen-erated 'select' implementation. + bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; + MachineOperand getSubOperand64(MachineOperand &MO, unsigned SubIdx) const; bool selectG_CONSTANT(MachineInstr &I) const; bool selectG_ADD(MachineInstr &I) const; @@ -57,9 +69,29 @@ bool selectG_LOAD(MachineInstr &I) const; bool selectG_STORE(MachineInstr &I) const; + InstructionSelector::ComplexRendererFns + selectImm(MachineOperand &Root) const; + + InstructionSelector::ComplexRendererFns + selectVSRC0(MachineOperand &Root) const; + + InstructionSelector::ComplexRendererFns + selectSSRC(MachineOperand &Root) const; + const SIInstrInfo &TII; const SIRegisterInfo &TRI; const AMDGPURegisterBankInfo &RBI; + const AMDGPUTargetMachine &TM; + const SISubtarget &STI; + bool EnableLateStructurizeCFG; +#define GET_GLOBALISEL_PREDICATES_DECL +#include "AMDGPUGenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATES_DECL + +#define GET_GLOBALISEL_TEMPORARIES_DECL +#include "AMDGPUGenGlobalISel.inc" +#undef GET_GLOBALISEL_TEMPORARIES_DECL + protected: AMDGPUAS AMDGPUASI; }; Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -17,6 +17,9 @@ #include "AMDGPURegisterBankInfo.h" #include "AMDGPURegisterInfo.h" #include "AMDGPUSubtarget.h" +#include "AMDGPUTargetMachine.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" @@ -31,10 +34,28 @@ using namespace llvm; +#define GET_GLOBALISEL_IMPL +#include "AMDGPUGenGlobalISel.inc" +#undef GET_GLOBALISEL_IMPL + AMDGPUInstructionSelector::AMDGPUInstructionSelector( - const SISubtarget &STI, const AMDGPURegisterBankInfo &RBI) + const SISubtarget &STI, const AMDGPURegisterBankInfo &RBI, + const AMDGPUTargetMachine &TM) : InstructionSelector(), TII(*STI.getInstrInfo()), - TRI(*STI.getRegisterInfo()), RBI(RBI), AMDGPUASI(STI.getAMDGPUAS()) {} + TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM), + STI(STI), + EnableLateStructurizeCFG(AMDGPUTargetMachine::EnableLateStructurizeCFG), +#define GET_GLOBALISEL_PREDICATES_INIT +#include "AMDGPUGenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATES_INIT +#define GET_GLOBALISEL_TEMPORARIES_INIT +#include "AMDGPUGenGlobalISel.inc" +#undef GET_GLOBALISEL_TEMPORARIES_INIT + ,AMDGPUASI(STI.getAMDGPUAS()) +{ +} + +const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; } MachineOperand AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO, @@ -416,6 +437,8 @@ switch (I.getOpcode()) { default: break; + case TargetOpcode::G_OR: + return selectImpl(I, CoverageInfo); case TargetOpcode::G_ADD: return selectG_ADD(I); case TargetOpcode::G_CONSTANT: @@ -429,3 +452,53 @@ } return false; } + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectImm(MachineOperand &Root) const { + MachineInstr *MI = Root.getParent(); + MachineBasicBlock *BB = MI->getParent(); + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + Optional C = getConstantVRegVal(Root.getReg(), MRI); + const ConstantFP *CFP = getConstantFPVRegVal(Root.getReg(), MRI); + + if (C || CFP) { + int64_t Imm = C ? *C : CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addImm(Imm); } + }}; + } + + return None; +} + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectVSRC0(MachineOperand &Root) const { + auto Res = selectImm(Root); + if (Res != None) + return Res; + + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(Root); } + }}; +} + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectSSRC(MachineOperand &Root) const { + MachineInstr *MI = Root.getParent(); + MachineBasicBlock *BB = MI->getParent(); + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + auto Res = selectImm(Root); + if (Res != None) + return Res; + + if (RBI.getRegBank(Root.getReg(), MRI, TRI)->getID() == AMDGPU::SGPRRegBankID) + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(Root); } + }}; + + return None; +} Index: lib/Target/AMDGPU/AMDGPUSubtarget.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -381,7 +381,7 @@ RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo())); InstSelector.reset(new AMDGPUInstructionSelector( - *this, *static_cast(RegBankInfo.get()))); + *this, *static_cast(RegBankInfo.get()), TM)); } void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, Index: lib/Target/AMDGPU/CMakeLists.txt =================================================================== --- lib/Target/AMDGPU/CMakeLists.txt +++ lib/Target/AMDGPU/CMakeLists.txt @@ -15,6 +15,9 @@ tablegen(LLVM AMDGPUGenSearchableTables.inc -gen-searchable-tables) tablegen(LLVM AMDGPUGenSubtargetInfo.inc -gen-subtarget) +set(LLVM_TARGET_DEFINITIONS AMDGPUGISel.td) +tablegen(LLVM AMDGPUGenGlobalISel.inc -gen-global-isel) + add_public_tablegen_target(AMDGPUCommonTableGen) add_llvm_target(AMDGPUCodeGen Index: lib/Target/AMDGPU/SIRegisterInfo.h =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.h +++ lib/Target/AMDGPU/SIRegisterInfo.h @@ -227,6 +227,9 @@ // Not a callee saved register. return AMDGPU::SGPR30_SGPR31; } + const TargetRegisterClass * + getConstrainedRegClassForOperand(const MachineOperand &MO, + const MachineRegisterInfo &MRI) const override; private: void buildSpillLoadStore(MachineBasicBlock::iterator MI, Index: lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.cpp +++ lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "SIRegisterInfo.h" +#include "AMDGPURegisterBankInfo.h" #include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" @@ -1562,3 +1563,23 @@ return Empty; return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit); } + +const TargetRegisterClass * +SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO, + const MachineRegisterInfo &MRI) const { + unsigned Size = getRegSizeInBits(MO.getReg(), MRI); + const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg()); + if (!RB) + return nullptr; + + switch (Size) { + case 32: + return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass : + &AMDGPU::SReg_32_XM0RegClass; + case 64: + return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass : + &AMDGPU::SReg_64_XEXECRegClass; + default: + llvm_unreachable("not implemented"); + } +} Index: test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir @@ -0,0 +1,78 @@ +# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN + +--- | + define void @or(i32 addrspace(1)* %global0) {ret void} +... +--- + +name: or +legalized: true +regBankSelected: true + +# GCN-LABEL: name: or +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr3_vgpr4 + ; GCN: [[SGPR0:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: [[SGPR1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GCN: [[VGPR0:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:vgpr(s32) = COPY $vgpr0 + %3:vgpr(s64) = COPY $vgpr3_vgpr4 + %4:sgpr(s32) = G_CONSTANT i32 1 + %5:sgpr(s32) = G_CONSTANT i32 4096 + + ; or ss + ; GCN: [[SS:%[0-9]+]]:sreg_32 = S_OR_B32 [[SGPR0]], [[SGPR1]] + %6:sgpr(s32) = G_OR %0, %1 + + ; or si + ; GCN: [[SI:%[0-9]+]]:sreg_32 = S_OR_B32 [[SS]], 1 + %7:sgpr(s32) = G_OR %6, %4 + + ; or is + ; GCN: [[IS:%[0-9]+]]:sreg_32 = S_OR_B32 1, [[SI]] + %8:sgpr(s32) = G_OR %4, %7 + + ; or sc + ; GCN: [[SC:%[0-9]+]]:sreg_32 = S_OR_B32 [[IS]], 4096 + %9:sgpr(s32) = G_OR %8, %5 + + ; or cs + ; GCN: [[CS:%[0-9]+]]:sreg_32_xm0 = S_OR_B32 4096, [[SC]] + %10:sgpr(s32) = G_OR %5, %9 + + ; or vs + ; GCN: [[VS:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[CS]], [[VGPR0]] + %11:vgpr(s32) = G_OR %2, %10 + + ; or sv + ; GCN: [[SV:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[CS]], [[VS]] + %12:vgpr(s32) = G_OR %10, %11 + + ; or vv + ; GCN: [[VV:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[SV]], [[VGPR0]] + %13:vgpr(s32) = G_OR %12, %2 + + ; or iv + ; GCN: [[IV:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 1, [[VV]] + %14:vgpr(s32) = G_OR %4, %13 + + ; or vi + ; GCN: [[VI:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 1, [[IV]] + %15:vgpr(s32) = G_OR %14, %4 + + ; or cv + ; GCN: [[CV:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 4096, [[VI]] + %16:vgpr(s32) = G_OR %5, %15 + + ; or vc + ; GCN: [[VC:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 4096, [[CV]] + %17:vgpr(s32) = G_OR %16, %5 + + + G_STORE %17, %3 :: (store 4 into %ir.global0) + +... +---