Index: lib/Target/AMDGPU/AMDGPUCallLowering.h =================================================================== --- lib/Target/AMDGPU/AMDGPUCallLowering.h +++ lib/Target/AMDGPU/AMDGPUCallLowering.h @@ -22,6 +22,13 @@ class AMDGPUTargetLowering; class AMDGPUCallLowering: public CallLowering { + + unsigned lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy, + unsigned Offset) const; + + void lowerParameter(MachineIRBuilder &MIRBuilder, Type *ParamTy, + unsigned Offset, unsigned DstReg) const; + public: AMDGPUCallLowering(const AMDGPUTargetLowering &TLI); Index: lib/Target/AMDGPU/AMDGPUCallLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -14,8 +14,13 @@ //===----------------------------------------------------------------------===// #include "AMDGPUCallLowering.h" +#include "AMDGPU.h" #include "AMDGPUISelLowering.h" - +#include "AMDGPUSubtarget.h" +#include "SIISelLowering.h" +#include "SIRegisterInfo.h" +#include "SIMachineFunctionInfo.h" +#include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -31,12 +36,135 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, unsigned VReg) const { + MIRBuilder.buildInstr(AMDGPU::S_ENDPGM); return true; } +unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder, + Type *ParamTy, + unsigned Offset) const { + + MachineFunction &MF = MIRBuilder.getMF(); + const SIRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + const Function &F = *MF.getFunction(); + const DataLayout &DL = F.getParent()->getDataLayout(); + PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS); + LLT PtrType(*PtrTy, DL); + unsigned DstReg = MRI.createGenericVirtualRegister(PtrType); + unsigned KernArgSegmentPtr = + TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR); + unsigned KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr); + + unsigned OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); + MIRBuilder.buildConstant(OffsetReg, Offset); + + MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg); + + return DstReg; +} + +void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder, + Type *ParamTy, unsigned Offset, + unsigned DstReg) const { + MachineFunction &MF = MIRBuilder.getMF(); + const Function &F = *MF.getFunction(); + const DataLayout &DL = F.getParent()->getDataLayout(); + PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS); + MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); + unsigned TypeSize = DL.getTypeStoreSize(ParamTy); + unsigned Align = DL.getABITypeAlignment(ParamTy); + unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset); + + MachineMemOperand *MMO = + MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad | + MachineMemOperand::MONonTemporal | + MachineMemOperand::MOInvariant, + TypeSize, Align); + + // FIXME: We need to handle sign/zero extend + MIRBuilder.buildLoad(DstReg, PtrReg, *MMO); +} + bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef VRegs) const { - // TODO: Implement once there are generic loads/stores. + + MachineFunction &MF = MIRBuilder.getMF(); + const SISubtarget *Subtarget = static_cast(&MF.getSubtarget()); + MachineRegisterInfo &MRI = MF.getRegInfo(); + SIMachineFunctionInfo *Info = MF.getInfo(); + const SIRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const DataLayout &DL = F.getParent()->getDataLayout(); + + SmallVector ArgLocs; + CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); + + // FIXME: How should these inputs interact with inreg / custom SGPR inputs? + if (Info->hasPrivateSegmentBuffer()) { + unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI); + MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass); + CCInfo.AllocateReg(PrivateSegmentBufferReg); + } + + if (Info->hasDispatchPtr()) { + unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI); + // FIXME: Need to add reg as live-in + CCInfo.AllocateReg(DispatchPtrReg); + } + + if (Info->hasQueuePtr()) { + unsigned QueuePtrReg = Info->addQueuePtr(*TRI); + // FIXME: Need to add reg as live-in + CCInfo.AllocateReg(QueuePtrReg); + } + + if (Info->hasKernargSegmentPtr()) { + unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI); + const LLT P2 = LLT::pointer(2, 64); + unsigned VReg = MRI.createGenericVirtualRegister(P2); + MRI.addLiveIn(InputPtrReg, VReg); + MIRBuilder.getMBB().addLiveIn(InputPtrReg); + MIRBuilder.buildCopy(VReg, InputPtrReg); + CCInfo.AllocateReg(InputPtrReg); + } + + if (Info->hasDispatchID()) { + unsigned DispatchIDReg = Info->addDispatchID(*TRI); + // FIXME: Need to add reg as live-in + CCInfo.AllocateReg(DispatchIDReg); + } + + if (Info->hasFlatScratchInit()) { + unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI); + // FIXME: Need to add reg as live-in + CCInfo.AllocateReg(FlatScratchInitReg); + } + + unsigned NumArgs = F.arg_size(); + Function::const_arg_iterator CurOrigArg = F.arg_begin(); + const AMDGPUTargetLowering &TLI = *getTLI(); + for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) { + CurOrigArg->getType()->dump(); + MVT ValVT = TLI.getValueType(DL, CurOrigArg->getType()).getSimpleVT(); + ISD::ArgFlagsTy Flags; + Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType())); + CCAssignFn *AssignFn = + TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false); + bool Res = + AssignFn(i, ValVT, ValVT, CCValAssign::Full, Flags, CCInfo); + assert(!Res && "Call operand has unhandled type"); + (void)Res; + } + + Function::const_arg_iterator Arg = F.arg_begin(); + for (unsigned i = 0; i != NumArgs; ++i, ++Arg) { + // FIXME: We should be getting DebugInfo from the arguments some how. + CCValAssign &VA = ArgLocs[i]; + lowerParameter(MIRBuilder, Arg->getType(), + VA.getLocMemOffset() + + Subtarget->getExplicitKernelArgOffset(), VRegs[i]); + } + return true; } Index: lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def =================================================================== --- /dev/null +++ lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def @@ -0,0 +1,67 @@ +//===- AMDGPUGenRegisterBankInfo.def -----------------------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file defines all the static objects used by AMDGPURegisterBankInfo. +/// \todo This should be generated by TableGen. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_BUILD_GLOBAL_ISEL +#error "You shouldn't build this" +#endif + +namespace llvm { +namespace AMDGPU { + +RegisterBank SGPRRegBank; +RegisterBank VGPRRegBank; + +RegisterBank *RegBanks[] = {&SGPRRegBank, &VGPRRegBank}; + +enum PartialMappingIdx { + None = - 1, + PM_SGPR32 = 0, + PM_SGPR64 = 1, + PM_VGPR32 = 2, + PM_VGPR64 = 3 +}; + +const RegisterBankInfo::PartialMapping PartMappings[] { + // StartIdx, Length, RegBank + {0, 32, SGPRRegBank}, + {0, 64, SGPRRegBank}, + {0, 32, VGPRRegBank}, + {0, 64, VGPRRegBank} +}; + +const RegisterBankInfo::ValueMapping ValMappings[] { + // SGPR 32-bit + {&PartMappings[0], 1}, + // SGPR 64-bit + {&PartMappings[1], 1}, + // VGPR 32-bit + {&PartMappings[2], 1}, + // VGPR 64-bit + {&PartMappings[3], 1} +}; + +enum ValueMappingIdx { + SGPRStartIdx = 0, + VGPRStartIdx = 2 +}; + +const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID, + unsigned Size) { + assert(Size % 32 == 0); + unsigned Idx = BankID == AMDGPU::SGPRRegBankID ? SGPRStartIdx : VGPRStartIdx; + Idx += (Size / 32) - 1; + return &ValMappings[Idx]; +} + +} // End AMDGPU namespace. +} // End llvm namespace. Index: lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1141,8 +1141,8 @@ SDLoc SL(ByteOffsetNode); AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration(); int64_t ByteOffset = C->getSExtValue(); - int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ? - ByteOffset >> 2 : ByteOffset; + int64_t EncodedOffset = + SIInstrInfo::getSMRDEncodedOffset(*Subtarget, ByteOffset); if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) { Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32); Index: lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.h +++ lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -16,6 +16,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H +#include "llvm/CodeGen/CallingConvLower.h" #include "llvm/Target/TargetLowering.h" namespace llvm { @@ -211,6 +212,8 @@ /// type of implicit parameter. uint32_t getImplicitParameterOffset(const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const; + + CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const; }; namespace AMDGPUISD { Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -646,6 +646,11 @@ // TargetLowering Callbacks //===---------------------------------------------------------------------===// +CCAssignFn *AMDGPUTargetLowering::CCAssignFnForCall(CallingConv::ID CC, + bool IsVarArg) const { + return CC_AMDGPU; +} + /// The SelectionDAGBuilder will automatically promote function arguments /// with illegal types. However, this does not work for the AMDGPU targets /// since the function arguments are stored in memory as these illegal types. Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- /dev/null +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -0,0 +1,61 @@ +//===- AMDGPUInstructionSelector --------------------------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file declares the targeting of the InstructionSelector class for +/// AMDGPU. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H +#define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H + +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" +#include "llvm/CodeGen/MachineInstr.h" + +namespace llvm { + +class AMDGPUInstrInfo; +class AMDGPURegisterBankInfo; +class SIInstrInfo; +class SIRegisterInfo; +class SISubtarget; + +class AMDGPUInstructionSelector : public InstructionSelector { +public: + AMDGPUInstructionSelector(const SISubtarget &STI, + const AMDGPURegisterBankInfo &RBI); + + bool select(MachineInstr &I) const override; + + struct GEPInfo { + const MachineInstr &GEP; + SmallVector SgprParts; + SmallVector VgprParts; + int64_t Imm; + GEPInfo(const MachineInstr &GEP) : GEP(GEP), Imm(0) { } + }; + +private: + MachineOperand getSubOperand64(MachineOperand &MO, unsigned SubIdx) const; + bool selectG_CONSTANT(MachineInstr &I) const; + bool selectG_ADD(MachineInstr &I) const; + bool selectG_GEP(MachineInstr &I) const; + bool hasVgprParts(ArrayRef AddrInfo) const; + void getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI, + SmallVectorImpl &AddrInfo) const; + bool selectSMRD(MachineInstr &I, ArrayRef AddrInfo) const; + bool selectG_LOAD(MachineInstr &I) const; + bool selectG_STORE(MachineInstr &I) const; + + const SIInstrInfo &TII; + const SIRegisterInfo &TRI; + const AMDGPURegisterBankInfo &RBI; +}; + +} // End llvm namespace. +#endif Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- /dev/null +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -0,0 +1,406 @@ +//===- AMDGPUInstructionSelector.cpp ----------------------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the targeting of the InstructionSelector class for +/// AMDGPU. +/// \todo This should be generated by TableGen. +//===----------------------------------------------------------------------===// + +#include "AMDGPUInstructionSelector.h" +#include "AMDGPUInstrInfo.h" +#include "AMDGPURegisterBankInfo.h" +#include "AMDGPURegisterInfo.h" +#include "AMDGPUSubtarget.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "amdgpu-isel" + +using namespace llvm; + +AMDGPUInstructionSelector::AMDGPUInstructionSelector( + const SISubtarget &STI, const AMDGPURegisterBankInfo &RBI) + : InstructionSelector(), TII(*STI.getInstrInfo()), + TRI(*STI.getRegisterInfo()), + RBI(RBI) {} + +MachineOperand AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO, + unsigned SubIdx) const { + + MachineInstr *MI = MO.getParent(); + MachineBasicBlock *BB = MO.getParent()->getParent(); + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + + if (MO.isReg()) { + unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx); + unsigned Reg = MO.getReg(); + BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg) + .addReg(Reg, 0, ComposedSubIdx); + + return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(), + MO.isKill(), MO.isDead(), MO.isUndef(), + MO.isEarlyClobber(), 0, + MO.isDebug(), MO.isInternalRead()); + } + + assert(MO.isImm()); + + APInt Imm(64, MO.getImm()); + + switch (SubIdx) { + default: + llvm_unreachable("do not know to split immediate with this sub index."); + case AMDGPU::sub0: + return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue()); + case AMDGPU::sub1: + return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue()); + } +} + +bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const { + MachineBasicBlock *BB = I.getParent(); + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI); + unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + + if (Size != 64) + return false; + + DebugLoc DL = I.getDebugLoc(); + + BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo) + .addOperand(getSubOperand64(I.getOperand(1), AMDGPU::sub0)) + .addOperand(getSubOperand64(I.getOperand(2), AMDGPU::sub0)); + + BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi) + .addOperand(getSubOperand64(I.getOperand(1), AMDGPU::sub1)) + .addOperand(getSubOperand64(I.getOperand(2), AMDGPU::sub1)); + + BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg()) + .addReg(DstLo) + .addImm(AMDGPU::sub0) + .addReg(DstHi) + .addImm(AMDGPU::sub1); + + for (MachineOperand &MO : I.explicit_operands()) { + if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + continue; + RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI); + } + + I.eraseFromParent(); + return true; +} + +bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const { + return selectG_ADD(I); +} + +bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const { + MachineBasicBlock *BB = I.getParent(); + DebugLoc DL = I.getDebugLoc(); + + // FIXME: Select store instruction based on address space + MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(AMDGPU::FLAT_STORE_DWORD)) + .addOperand(I.getOperand(1)) + .addOperand(I.getOperand(0)) + .addImm(0) + .addImm(0) + .addImm(0); + + // Now that we selected an opcode, we need to constrain the register + // operands to use appropriate classes. + bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); + + I.eraseFromParent(); + return Ret; +} + +bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { + MachineBasicBlock *BB = I.getParent(); + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + unsigned DstReg = I.getOperand(0).getReg(); + unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI); + + if (Size == 32) { + I.setDesc(TII.get(AMDGPU::S_MOV_B32)); + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); + } + + assert(Size == 64); + + DebugLoc DL = I.getDebugLoc(); + unsigned LoReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + unsigned HiReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + APInt Imm(64, I.getOperand(1).getImm()); + + BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), LoReg) + .addImm(Imm.trunc(32).getZExtValue()); + + BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), HiReg) + .addImm(Imm.ashr(32).getZExtValue()); + + BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg) + .addReg(LoReg) + .addImm(AMDGPU::sub0) + .addReg(HiReg) + .addImm(AMDGPU::sub1); + // We can't call constrainSelectedInstRegOperands here, because it doesn't + // work for target independent opcodes + I.eraseFromParent(); + return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI); +} + +static bool isConstant(const MachineInstr &MI) { + return MI.getOpcode() == TargetOpcode::G_CONSTANT; +} + +void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load, + const MachineRegisterInfo &MRI, SmallVectorImpl &AddrInfo) const { + + const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg()); + + assert(PtrMI); + + if (PtrMI->getOpcode() != TargetOpcode::G_GEP) { + return; + } + + GEPInfo GEPInfo(*PtrMI); + + for (unsigned i = 1, e = 3; i < e; ++i) { + const MachineOperand &GEPOp = PtrMI->getOperand(i); + const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg()); + assert(OpDef); + if (isConstant(*OpDef)) { + // FIXME: Is it possible to have multiple Imm parts? Maybe if we + // are lacking other optimizations. + assert(GEPInfo.Imm == 0); + GEPInfo.Imm = OpDef->getOperand(1).getImm(); + continue; + } + const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI); + if (OpBank->getID() == AMDGPU::SGPRRegBankID) + GEPInfo.SgprParts.push_back(GEPOp.getReg()); + else + GEPInfo.VgprParts.push_back(GEPOp.getReg()); + } + + AddrInfo.push_back(GEPInfo); + getAddrModeInfo(*PtrMI, MRI, AddrInfo); +} + +static bool isInstrUniform(const MachineInstr &MI) { + if (!MI.hasOneMemOperand()) + return false; + + const MachineMemOperand *MMO = *MI.memoperands_begin(); + const Value *Ptr = MMO->getValue(); + + // UndefValue means this is a load of a kernel input. These are uniform. + // Sometimes LDS instructions have constant pointers. + // If Ptr is null, then that means this mem operand contains a + // PseudoSourceValue like GOT. + if (!Ptr || isa(Ptr) || isa(Ptr) || + isa(Ptr) || isa(Ptr)) + return true; + + const Instruction *I = dyn_cast(Ptr); + return I && I->getMetadata("amdgpu.uniform"); +} + +static unsigned getSmrdOpcode(unsigned BaseOpcode, unsigned LoadSize) { + + if (LoadSize == 32) + return BaseOpcode; + + switch (BaseOpcode) { + case AMDGPU::S_LOAD_DWORD_IMM: + switch (LoadSize) { + case 64: return AMDGPU::S_LOAD_DWORDX2_IMM; + case 128: return AMDGPU::S_LOAD_DWORDX4_IMM; + case 256: return AMDGPU::S_LOAD_DWORDX8_IMM; + case 512: return AMDGPU::S_LOAD_DWORDX16_IMM; + } + break; + case AMDGPU::S_LOAD_DWORD_IMM_ci: + switch (LoadSize) { + case 64: return AMDGPU::S_LOAD_DWORDX2_IMM_ci; + case 128: return AMDGPU::S_LOAD_DWORDX4_IMM_ci; + case 256: return AMDGPU::S_LOAD_DWORDX8_IMM_ci; + case 512: return AMDGPU::S_LOAD_DWORDX16_IMM_ci; + } + break; + case AMDGPU::S_LOAD_DWORD_SGPR: + switch (LoadSize) { + case 64: return AMDGPU::S_LOAD_DWORDX2_SGPR; + case 128: return AMDGPU::S_LOAD_DWORDX4_SGPR; + case 256: return AMDGPU::S_LOAD_DWORDX8_SGPR; + case 512: return AMDGPU::S_LOAD_DWORDX16_SGPR; + } + break; + } + llvm_unreachable("Invalid base smrd opcode or size"); +} + +bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef AddrInfo) const { + for (const GEPInfo &GEPInfo : AddrInfo) { + if (!GEPInfo.VgprParts.empty()) + return true; + } + return false; +} + +bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I, + ArrayRef AddrInfo) const { + + if (!I.hasOneMemOperand()) + return false; + + if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS) + return false; + + if (!isInstrUniform(I)) + return false; + + if (hasVgprParts(AddrInfo)) + return false; + + MachineBasicBlock *BB = I.getParent(); + MachineFunction *MF = BB->getParent(); + const SISubtarget &Subtarget = MF->getSubtarget(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + unsigned DstReg = I.getOperand(0).getReg(); + const DebugLoc &DL = I.getDebugLoc(); + unsigned Opcode; + unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI); + + if (!AddrInfo.empty() && AddrInfo[0].SgprParts.size() == 1) { + + const GEPInfo &GEPInfo = AddrInfo[0]; + + unsigned PtrReg = GEPInfo.SgprParts[0]; + int64_t EncodedImm = SIInstrInfo::getSMRDEncodedOffset(Subtarget, + GEPInfo.Imm); + if (SIInstrInfo::isLegalSMRDImmOffset(Subtarget, GEPInfo.Imm)) { + Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize); + + MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg) + .addReg(PtrReg) + .addImm(EncodedImm) + .addImm(0); // glc + return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI); + } + + if (Subtarget.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS && + isUInt<32>(EncodedImm)) { + Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM_ci, LoadSize); + MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg) + .addReg(PtrReg) + .addImm(EncodedImm) + .addImm(0); // glc + return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI); + } + + if (isUInt<32>(GEPInfo.Imm)) { + Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_SGPR, LoadSize); + unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), OffsetReg) + .addImm(GEPInfo.Imm); + + MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg) + .addReg(PtrReg) + .addReg(OffsetReg) + .addImm(0); // glc + return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI); + } + } + + unsigned PtrReg = I.getOperand(1).getReg(); + Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize); + MachineInstr *SMRD = BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg) + .addReg(PtrReg) + .addImm(0) + .addImm(0); // glc + return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI); +} + + +bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const { + MachineBasicBlock *BB = I.getParent(); + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + DebugLoc DL = I.getDebugLoc(); + unsigned DstReg = I.getOperand(0).getReg(); + unsigned PtrReg = I.getOperand(1).getReg(); + unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI); + unsigned Opcode; + + SmallVector AddrInfo; + + getAddrModeInfo(I, MRI, AddrInfo); + + if (selectSMRD(I, AddrInfo)) { + I.eraseFromParent(); + return true; + } + + switch (LoadSize) { + default: llvm_unreachable("Load size not supported\n"); + case 32: + Opcode = AMDGPU::FLAT_LOAD_DWORD; + break; + case 64: + Opcode = AMDGPU::FLAT_LOAD_DWORDX2; + break; + } + + MachineInstr *Flat = BuildMI(*BB, &I, DL, TII.get(Opcode)) + .addOperand(I.getOperand(0)) + .addReg(PtrReg) + .addImm(0) + .addImm(0) + .addImm(0); + + bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); + I.eraseFromParent(); + return Ret; +} + +bool AMDGPUInstructionSelector::select(MachineInstr &I) const { + + if (!isPreISelGenericOpcode(I.getOpcode())) + return true; + + switch (I.getOpcode()) { + default: break; + case TargetOpcode::G_ADD: + return selectG_ADD(I); + case TargetOpcode::G_CONSTANT: + return selectG_CONSTANT(I); + case TargetOpcode::G_GEP: + return selectG_GEP(I); + case TargetOpcode::G_LOAD: + return selectG_LOAD(I); + case TargetOpcode::G_STORE: + return selectG_STORE(I); + } + return false; +} Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.h =================================================================== --- /dev/null +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -0,0 +1,30 @@ +//===- AMDGPULegalizerInfo ---------------------------------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file declares the targeting of the Machinelegalizer class for +/// AMDGPU. +/// \todo This should be generated by TableGen. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINELEGALIZER_H +#define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINELEGALIZER_H + +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" + +namespace llvm { + +class LLVMContext; + +/// This class provides the information for the target register banks. +class AMDGPULegalizerInfo : public LegalizerInfo { +public: + AMDGPULegalizerInfo(); +}; +} // End llvm namespace. +#endif Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- /dev/null +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -0,0 +1,57 @@ +//===- AMDGPULegalizerInfo.cpp -----------------------------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the targeting of the Machinelegalizer class for +/// AMDGPU. +/// \todo This should be generated by TableGen. +//===----------------------------------------------------------------------===// + +#include "AMDGPULegalizerInfo.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/Target/TargetOpcodes.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#ifndef LLVM_BUILD_GLOBAL_ISEL +#error "You shouldn't build this" +#endif + +AMDGPULegalizerInfo::AMDGPULegalizerInfo() { + using namespace TargetOpcode; + + const LLT S32 = LLT::scalar(32); + const LLT S64 = LLT::scalar(64); + const LLT P1 = LLT::pointer(1, 64); + const LLT P2 = LLT::pointer(2, 64); + + setAction({G_CONSTANT, S64}, Legal); + + setAction({G_GEP, P1}, Legal); + setAction({G_GEP, 1, P1}, Legal); + setAction({G_GEP, P2}, Legal); + setAction({G_GEP, 1, P2}, Legal); + setAction({G_GEP, 1, S64}, Legal); + setAction({G_GEP, 2, S64}, Legal); + + setAction({G_LOAD, P1}, Legal); + setAction({G_LOAD, P2}, Legal); + setAction({G_LOAD, S32}, Legal); + setAction({G_LOAD, 1, S64}, Legal); + setAction({G_LOAD, 1, P1}, Legal); + setAction({G_LOAD, 1, P2}, Legal); + + setAction({G_STORE, S32}, Legal); + setAction({G_STORE, 1, S64}, Legal); + setAction({G_STORE, 1, P1}, Legal); + + computeTables(); +} Index: lib/Target/AMDGPU/AMDGPURegisterBankInfo.h =================================================================== --- /dev/null +++ lib/Target/AMDGPU/AMDGPURegisterBankInfo.h @@ -0,0 +1,75 @@ +//===- AMDGPURegisterBankInfo -----------------------------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file declares the targeting of the RegisterBankInfo class for AMDGPU. +/// \todo This should be generated by TableGen. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERBANKINFO_H +#define LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERBANKINFO_H + +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" + +namespace llvm { + +class SIRegisterInfo; +class TargetRegisterInfo; + +namespace AMDGPU { +enum { + SGPRRegBankID = 0, + VGPRRegBankID = 1, + NumRegisterBanks +}; +} // End AMDGPU namespace. + +/// This class provides the information for the target register banks. +class AMDGPURegisterBankInfo : public RegisterBankInfo { + const SIRegisterInfo *TRI; + + /// See RegisterBankInfo::applyMapping. + void applyMappingImpl(const OperandsMapper &OpdMapper) const override; + + RegisterBankInfo::InstructionMapping + getInstrMappingForLoad(const MachineInstr &MI) const; +public: + AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI); + /// Get the cost of a copy from \p B to \p A, or put differently, + /// get the cost of A = COPY B. Since register banks may cover + /// different size, \p Size specifies what will be the size in bits + /// that will be copied around. + /// + /// \note Since this is a copy, both registers have the same size. + unsigned copyCost(const RegisterBank &A, const RegisterBank &B, + unsigned Size) const override; + + /// Get a register bank that covers \p RC. + /// + /// \pre \p RC is a user-defined register class (as opposed as one + /// generated by TableGen). + /// + /// \note The mapping RC -> RegBank could be built while adding the + /// coverage for the register banks. However, we do not do it, because, + /// at least for now, we only need this information for register classes + /// that are used in the description of instruction. In other words, + /// there are just a handful of them and we do not want to waste space. + /// + /// \todo This should be TableGen'ed. + const RegisterBank & + getRegBankFromRegClass(const TargetRegisterClass &RC) const override; + + /// Get the alternative mappings for \p MI. + /// Alternative in the sense different from getInstrMapping. + InstructionMappings + getInstrAlternativeMappings(const MachineInstr &MI) const override; + + InstructionMapping getInstrMapping(const MachineInstr &MI) const override; +}; +} // End llvm namespace. +#endif Index: lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- /dev/null +++ lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -0,0 +1,236 @@ +//===- AMDGPURegisterBankInfo.cpp -------------------------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the targeting of the RegisterBankInfo class for +/// AMDGPU. +/// \todo This should be generated by TableGen. +//===----------------------------------------------------------------------===// + +#include "AMDGPURegisterBankInfo.h" +#include "AMDGPUInstrInfo.h" +#include "SIRegisterInfo.h" +#include "llvm/CodeGen/GlobalISel/RegisterBank.h" +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +// This file will be TableGen'ed at some point. +#include "AMDGPUGenRegisterBankInfo.def" + +using namespace llvm; + +#ifndef LLVM_BUILD_GLOBAL_ISEL +#error "You shouldn't build this" +#endif + +AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI) + : RegisterBankInfo(AMDGPU::RegBanks, AMDGPU::NumRegisterBanks), + TRI(static_cast(&TRI)) { + + // HACK: Until this is fully tablegen'd + static bool AlreadyInit = false; + if (AlreadyInit) + return; + + AlreadyInit = true; + + createRegisterBank(AMDGPU::SGPRRegBankID, "SGPR"); + addRegBankCoverage(AMDGPU::SGPRRegBankID, AMDGPU::SGPR_32RegClassID, TRI); + addRegBankCoverage(AMDGPU::SGPRRegBankID, AMDGPU::SReg_64RegClassID, TRI); + addRegBankCoverage(AMDGPU::SGPRRegBankID, AMDGPU::SGPR_64RegClassID, TRI); + const RegisterBank &RBSGPR = getRegBank(AMDGPU::SGPRRegBankID); + assert(RBSGPR.getSize() == 64); + assert(&RBSGPR == &AMDGPU::SGPRRegBank); + + createRegisterBank(AMDGPU::VGPRRegBankID, "VGPR"); + addRegBankCoverage(AMDGPU::VGPRRegBankID, AMDGPU::VGPR_32RegClassID, TRI); + addRegBankCoverage(AMDGPU::VGPRRegBankID, AMDGPU::VReg_64RegClassID, TRI); + addRegBankCoverage(AMDGPU::VGPRRegBankID, AMDGPU::VReg_96RegClassID, TRI); + const RegisterBank &RBVGPR = getRegBank(AMDGPU::VGPRRegBankID); + assert(RBVGPR.getSize() == 96); + assert(&RBVGPR == &AMDGPU::VGPRRegBank); + +} + +unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &A, + const RegisterBank &B, + unsigned Size) const { + return RegisterBankInfo::copyCost(A, B, Size); +} + +const RegisterBank &AMDGPURegisterBankInfo::getRegBankFromRegClass( + const TargetRegisterClass &RC) const { + + if (TRI->isSGPRClass(&RC)) + return getRegBank(AMDGPU::SGPRRegBankID); + + return getRegBank(AMDGPU::VGPRRegBankID); +} + +RegisterBankInfo::InstructionMappings +AMDGPURegisterBankInfo::getInstrAlternativeMappings( + const MachineInstr &MI) const { + + const MachineFunction &MF = *MI.getParent()->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + + unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); + + InstructionMappings AltMappings; + switch (MI.getOpcode()) { + case TargetOpcode::G_LOAD: { + // FIXME: Should we be hard coding the size for these mappings? + InstructionMapping SSMapping(1, 1, + getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), + AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}), + 2); // Num Operands + AltMappings.emplace_back(std::move(SSMapping)); + + InstructionMapping VVMapping(2, 1, + getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size), + AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64)}), + 2); // Num Operands + AltMappings.emplace_back(std::move(VVMapping)); + + // FIXME: Should this be the pointer-size (64-bits) or the size of the + // register that will hold the bufffer resourc (128-bits). + InstructionMapping VSMapping(3, 1, + getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size), + AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}), + 2); // Num Operands + AltMappings.emplace_back(std::move(VSMapping)); + + return AltMappings; + + } + default: + break; + } + return RegisterBankInfo::getInstrAlternativeMappings(MI); +} + +void AMDGPURegisterBankInfo::applyMappingImpl( + const OperandsMapper &OpdMapper) const { + return applyDefaultMapping(OpdMapper); +} + +static bool isInstrUniform(const MachineInstr &MI) { + if (!MI.hasOneMemOperand()) + return false; + + const MachineMemOperand *MMO = *MI.memoperands_begin(); + return AMDGPU::isUniformMMO(MMO); +} + +RegisterBankInfo::InstructionMapping +AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const { + + const MachineFunction &MF = *MI.getParent()->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + RegisterBankInfo::InstructionMapping Mapping = + InstructionMapping{1, 1, nullptr, MI.getNumOperands()}; + SmallVector OpdsMapping(MI.getNumOperands()); + unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); + + const ValueMapping *ValMapping; + const ValueMapping *PtrMapping; + + if (isInstrUniform(MI)) { + // We have a uniform instruction so we want to use an SMRD load + ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); + // FIXME: Don't hard code pointer size. + PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64); + } else { + ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); + // FIXME: Don't hard code pointer size. + // FIXME: What would happen if we used SGPRRegBankID here? + PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64); + } + + OpdsMapping[0] = ValMapping; + OpdsMapping[1] = PtrMapping; + Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping)); + return Mapping; + + // FIXME: Do we want to add a mapping for FLAT load, or should we just + // handle that during instruction selection? +} + +RegisterBankInfo::InstructionMapping +AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { + RegisterBankInfo::InstructionMapping Mapping = getInstrMappingImpl(MI); + + if (Mapping.isValid()) + return Mapping; + + const MachineFunction &MF = *MI.getParent()->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + Mapping = InstructionMapping{1, 1, nullptr, MI.getNumOperands()}; + SmallVector OpdsMapping(MI.getNumOperands()); + + switch (MI.getOpcode()) { + default: break; + case AMDGPU::G_CONSTANT: { + unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); + Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping)); + return Mapping; + } + case AMDGPU::G_GEP: { + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + if (!MI.getOperand(i).isReg()) + continue; + + unsigned Size = MRI.getType(MI.getOperand(i).getReg()).getSizeInBits(); + OpdsMapping[i] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); + } + Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping)); + return Mapping; + } + case AMDGPU::G_STORE: { + assert(MI.getOperand(0).isReg()); + unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + // FIXME: We need to specify a different reg bank once scalar stores + // are supported. + const ValueMapping *ValMapping = + AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); + // FIXME: Depending on the type of store, the pointer could be in + // the SGPR Reg bank. + // FIXME: Pointer size should be based on the address space. + const ValueMapping *PtrMapping = + AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64); + + OpdsMapping[0] = ValMapping; + OpdsMapping[1] = PtrMapping; + Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping)); + return Mapping; + } + + case AMDGPU::G_LOAD: + return getInstrMappingForLoad(MI); + } + + unsigned BankID = AMDGPU::SGPRRegBankID; + + Mapping = InstructionMapping{1, 1, nullptr, MI.getNumOperands()}; + unsigned Size = 0; + for (unsigned Idx = 0; Idx < MI.getNumOperands(); ++Idx) { + // If the operand is not a register default to the size of the previous + // operand. + // FIXME: Can't we pull the types from the MachineInstr rather than the + // operands. + if (MI.getOperand(Idx).isReg()) + Size = getSizeInBits(MI.getOperand(Idx).getReg(), MRI, *TRI); + OpdsMapping.push_back(AMDGPU::getValueMapping(BankID, Size)); + } + Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping)); + + return Mapping; +} Index: lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- lib/Target/AMDGPU/AMDGPUSubtarget.h +++ lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -496,6 +496,21 @@ return GISel->getCallLowering(); } + const InstructionSelector *getInstructionSelector() const override { + assert(GISel && "Access to GlobalISel APIs not set"); + return GISel->getInstructionSelector(); + } + + const LegalizerInfo *getLegalizerInfo() const { + assert(GISel && "Access to GlobalISel APIs not set"); + return GISel->getLegalizerInfo(); + } + + const RegisterBankInfo *getRegBankInfo() const override { + assert(GISel && "Access to GlobalISel APIs not set"); + return GISel->getRegBankInfo(); + } + const SIRegisterInfo *getRegisterInfo() const override { return &InstrInfo.getRegisterInfo(); } Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -16,6 +16,9 @@ #include "AMDGPUTargetMachine.h" #include "AMDGPU.h" #include "AMDGPUCallLowering.h" +#include "AMDGPUInstructionSelector.h" +#include "AMDGPULegalizerInfo.h" +#include "AMDGPURegisterBankInfo.h" #include "AMDGPUTargetObjectFile.h" #include "AMDGPUTargetTransformInfo.h" #include "GCNSchedStrategy.h" @@ -25,7 +28,12 @@ #include "SIISelLowering.h" #include "SIInstrInfo.h" #include "SIMachineScheduler.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" +#include "llvm/CodeGen/GlobalISel/Legalizer.h" +#include "llvm/CodeGen/GlobalISel/RegBankSelect.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/TargetRegistry.h" @@ -221,9 +229,21 @@ namespace { struct SIGISelActualAccessor : public GISelAccessor { std::unique_ptr CallLoweringInfo; + std::unique_ptr InstSelector; + std::unique_ptr Legalizer; + std::unique_ptr RegBankInfo; const AMDGPUCallLowering *getCallLowering() const override { return CallLoweringInfo.get(); } + const InstructionSelector *getInstructionSelector() const override { + return InstSelector.get(); + } + const class LegalizerInfo *getLegalizerInfo() const override { + return Legalizer.get(); + } + const RegisterBankInfo *getRegBankInfo() const override { + return RegBankInfo.get(); + } }; } // End anonymous namespace. #endif @@ -256,6 +276,11 @@ SIGISelActualAccessor *GISel = new SIGISelActualAccessor(); GISel->CallLoweringInfo.reset( new AMDGPUCallLowering(*I->getTargetLowering())); + GISel->Legalizer.reset(new AMDGPULegalizerInfo()); + + GISel->RegBankInfo.reset(new AMDGPURegisterBankInfo(*I->getRegisterInfo())); + GISel->InstSelector.reset(new AMDGPUInstructionSelector(*I, + *static_cast(GISel->RegBankInfo.get()))); #endif I->setGISelAccessor(*GISel); @@ -546,16 +571,20 @@ } bool GCNPassConfig::addLegalizeMachineIR() { + addPass(new Legalizer()); return false; } bool GCNPassConfig::addRegBankSelect() { + addPass(new RegBankSelect()); return false; } bool GCNPassConfig::addGlobalInstructionSelect() { + addPass(new InstructionSelect()); return false; } + #endif void GCNPassConfig::addPreRegAlloc() { Index: lib/Target/AMDGPU/CMakeLists.txt =================================================================== --- lib/Target/AMDGPU/CMakeLists.txt +++ lib/Target/AMDGPU/CMakeLists.txt @@ -39,14 +39,17 @@ AMDGPUTargetObjectFile.cpp AMDGPUIntrinsicInfo.cpp AMDGPUISelDAGToDAG.cpp + AMDGPULegalizerInfo.cpp AMDGPUMCInstLower.cpp AMDGPUMachineFunction.cpp AMDGPUOpenCLImageTypeLoweringPass.cpp + AMDGPURegisterBankInfo.cpp AMDGPUSubtarget.cpp AMDGPUTargetMachine.cpp AMDGPUTargetTransformInfo.cpp AMDGPUISelLowering.cpp AMDGPUInstrInfo.cpp + AMDGPUInstructionSelector.cpp AMDGPUPromoteAlloca.cpp AMDGPURegisterInfo.cpp GCNHazardRecognizer.cpp Index: lib/Target/AMDGPU/LLVMBuild.txt =================================================================== --- lib/Target/AMDGPU/LLVMBuild.txt +++ lib/Target/AMDGPU/LLVMBuild.txt @@ -30,5 +30,5 @@ type = Library name = AMDGPUCodeGen parent = AMDGPU -required_libraries = Analysis AsmPrinter CodeGen Core IPO MC AMDGPUAsmPrinter AMDGPUDesc AMDGPUInfo AMDGPUUtils Scalar SelectionDAG Support Target TransformUtils Vectorize +required_libraries = Analysis AsmPrinter CodeGen Core IPO MC AMDGPUAsmPrinter AMDGPUDesc AMDGPUInfo AMDGPUUtils Scalar SelectionDAG Support Target TransformUtils Vectorize GlobalISel add_to_library_groups = AMDGPU Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -611,18 +611,8 @@ bool SITargetLowering::isMemOpUniform(const SDNode *N) const { const MemSDNode *MemNode = cast(N); - const Value *Ptr = MemNode->getMemOperand()->getValue(); - - // UndefValue means this is a load of a kernel input. These are uniform. - // Sometimes LDS instructions have constant pointers. - // If Ptr is null, then that means this mem operand contains a - // PseudoSourceValue like GOT. - if (!Ptr || isa(Ptr) || isa(Ptr) || - isa(Ptr) || isa(Ptr)) - return true; - const Instruction *I = dyn_cast(Ptr); - return I && I->getMetadata("amdgpu.uniform"); + return AMDGPU::isUniformMMO(MemNode->getMemOperand()); } TargetLoweringBase::LegalizeTypeAction Index: lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.h +++ lib/Target/AMDGPU/SIInstrInfo.h @@ -424,6 +424,12 @@ return MI.getDesc().TSFlags & SIInstrFlags::SOPK_ZEXT; } + static int64_t getSMRDEncodedOffset(const AMDGPUSubtarget &ST, + int64_t ByteOffset); + + static bool isLegalSMRDImmOffset(const AMDGPUSubtarget &ST, + int64_t ByteOffset); + bool sopkIsZext(uint16_t Opcode) const { return get(Opcode).TSFlags & SIInstrFlags::SOPK_ZEXT; } Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2529,6 +2529,21 @@ return DstReg; } +int64_t SIInstrInfo::getSMRDEncodedOffset(const AMDGPUSubtarget &ST, + int64_t ByteOffset) { + if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) + return ByteOffset >> 2; + + return ByteOffset; +} + +bool SIInstrInfo::isLegalSMRDImmOffset(const AMDGPUSubtarget &ST, + int64_t ByteOffset) { + int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset); + return ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ? + isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset); +} + void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const { Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -22,6 +22,7 @@ class FeatureBitset; class Function; class GlobalValue; +class MachineMemOperand; class MCContext; class MCInstrDesc; class MCRegisterClass; @@ -171,6 +172,8 @@ bool isInlinableLiteral64(int64_t Literal, bool IsVI); bool isInlinableLiteral32(int32_t Literal, bool IsVI); +bool isUniformMMO(const MachineMemOperand *MMO); + } // end namespace AMDGPU } // end namespace llvm Index: lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -9,6 +9,8 @@ #include "AMDGPUBaseInfo.h" #include "AMDGPU.h" #include "SIDefines.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" @@ -428,6 +430,19 @@ return false; } +bool isUniformMMO(const MachineMemOperand *MMO) { + const Value *Ptr = MMO->getValue(); + // UndefValue means this is a load of a kernel input. These are uniform. + // Sometimes LDS instructions have constant pointers. + // If Ptr is null, then that means this mem operand contains a + // PseudoSourceValue like GOT. + if (!Ptr || isa(Ptr) || isa(Ptr) || + isa(Ptr) || isa(Ptr)) + return true; + + const Instruction *I = dyn_cast(Ptr); + return I && I->getMetadata("amdgpu.uniform"); +} } // End namespace AMDGPU } // End namespace llvm Index: test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir @@ -0,0 +1,29 @@ +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN + +--- | + define void @global_addrspace(i32 addrspace(1)* %global0) { ret void } +... +--- + +name: global_addrspace +legalized: true +regBankSelected: true + +registers: + - { id: 0, class: vgpr } + - { id: 1, class: vgpr } + +# GCN: global_addrspace +# GCN: [[PTR:%[0-9]+]] = COPY %vgpr0_vgpr1 +# GCN: FLAT_LOAD_DWORD [[PTR]], 0, 0, 0 + +body: | + bb.0: + liveins: %vgpr0_vgpr1 + + %0(p1) = COPY %vgpr0_vgpr1 + %1(s32) = G_LOAD %0 :: (load 4 from %ir.global0) + +... +--- Index: test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir @@ -0,0 +1,171 @@ +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN,SI,SICI,SIVI +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN,CI,SICI +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN,VI,SIVI + +--- | + define void @smrd_imm(i32 addrspace(2)* %const0) { ret void } +... +--- + +name: smrd_imm +legalized: true +regBankSelected: true + +registers: + - { id: 0, class: sgpr } + - { id: 1, class: sgpr } + - { id: 2, class: sgpr } + - { id: 3, class: sgpr } + - { id: 4, class: sgpr } + - { id: 5, class: sgpr } + - { id: 6, class: sgpr } + - { id: 7, class: sgpr } + - { id: 8, class: sgpr } + - { id: 9, class: sgpr } + - { id: 10, class: sgpr } + - { id: 11, class: sgpr } + - { id: 12, class: sgpr } + - { id: 13, class: sgpr } + - { id: 14, class: sgpr } + - { id: 15, class: sgpr } + - { id: 16, class: sgpr } + - { id: 17, class: sgpr } + - { id: 18, class: sgpr } + - { id: 19, class: sgpr } + - { id: 20, class: sgpr } + - { id: 21, class: sgpr } + - { id: 22, class: sgpr } + - { id: 23, class: sgpr } + - { id: 24, class: sgpr } + - { id: 25, class: sgpr } + - { id: 26, class: sgpr } + - { id: 27, class: sgpr } + +# GCN: body: +# GCN: [[PTR:%[0-9]+]] = COPY %sgpr0_sgpr1 + +# Immediate offset: +# SICI: S_LOAD_DWORD_IMM [[PTR]], 1, 0 +# VI: S_LOAD_DWORD_IMM [[PTR]], 4, 0 + +# Max immediate offset for SI +# SICI: S_LOAD_DWORD_IMM [[PTR]], 255, 0 +# VI: S_LOAD_DWORD_IMM [[PTR]], 1020, 0 + +# Immediate overflow for SI +# FIXME: The immediate gets selected twice, once into the +# S_LOAD_DWORD instruction and once just as a normal constat. +# SI: S_MOV_B32 1024 +# SI: [[K1024:%[0-9]+]] = S_MOV_B32 1024 +# SI: S_LOAD_DWORD_SGPR [[PTR]], [[K1024]], 0 +# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 256, 0 +# VI: S_LOAD_DWORD_IMM [[PTR]], 1024, 0 + +# Max immediate offset for VI +# SI: S_MOV_B32 1048572 +# SI: [[K1048572:%[0-9]+]] = S_MOV_B32 1048572 +# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262143 +# VI: S_LOAD_DWORD_IMM [[PTR]], 1048572 + +# +# Immediate overflow for VI +# FIXME: The immediate gets selected twice, once into the +# S_LOAD_DWORD instruction and once just as a normal constat. +# SIVI: S_MOV_B32 1048576 +# SIVI: [[K1048576:%[0-9]+]] = S_MOV_B32 1048576 +# SIVI: S_LOAD_DWORD_SGPR [[PTR]], [[K1048576]], 0 +# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262144, 0 + +# Max immediate for CI +# SIVI: [[K_LO:%[0-9]+]] = S_MOV_B32 4294967292 +# SIVI: [[K_HI:%[0-9]+]] = S_MOV_B32 3 +# SIVI: [[K:%[0-9]+]] = REG_SEQUENCE [[K_LO]], 1, [[K_HI]], 2 +# SIVI: [[K_SUB0:%[0-9]+]] = COPY [[K]].sub0 +# SIVI: [[PTR_LO:%[0-9]+]] = COPY [[PTR]].sub0 +# SIVI: [[ADD_PTR_LO:%[0-9]+]] = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] +# SIVI: [[K_SUB1:%[0-9]+]] = COPY [[K]].sub1 +# SIVI: [[PTR_HI:%[0-9]+]] = COPY [[PTR]].sub1 +# SIVI: [[ADD_PTR_HI:%[0-9]+]] = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] +# SIVI: [[ADD_PTR:%[0-9]+]] = REG_SEQUENCE [[ADD_PTR_LO]], 1, [[ADD_PTR_HI]], 2 +# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0 +# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 4294967295, 0 + +# Immediate overflow for CI +# GCN: [[K_LO:%[0-9]+]] = S_MOV_B32 0 +# GCN: [[K_HI:%[0-9]+]] = S_MOV_B32 4 +# GCN: [[K:%[0-9]+]] = REG_SEQUENCE [[K_LO]], 1, [[K_HI]], 2 +# GCN: [[K_SUB0:%[0-9]+]] = COPY [[K]].sub0 +# GCN: [[PTR_LO:%[0-9]+]] = COPY [[PTR]].sub0 +# GCN: [[ADD_PTR_LO:%[0-9]+]] = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] +# GCN: [[K_SUB1:%[0-9]+]] = COPY [[K]].sub1 +# GCN: [[PTR_HI:%[0-9]+]] = COPY [[PTR]].sub1 +# GCN: [[ADD_PTR_HI:%[0-9]+]] = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] +# GCN: [[ADD_PTR:%[0-9]+]] = REG_SEQUENCE [[ADD_PTR_LO]], 1, [[ADD_PTR_HI]], 2 +# GCN: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0 + +# Max 32-bit byte offset +# FIXME: The immediate gets selected twice, once into the +# S_LOAD_DWORD instruction and once just as a normal constat. +# SIVI: S_MOV_B32 4294967292 +# SIVI: [[K4294967292:%[0-9]+]] = S_MOV_B32 4294967292 +# SIVI: S_LOAD_DWORD_SGPR [[PTR]], [[K4294967292]], 0 +# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741823, 0 + +# Overflow 32-bit byte offset +# SIVI: [[K_LO:%[0-9]+]] = S_MOV_B32 0 +# SIVI: [[K_HI:%[0-9]+]] = S_MOV_B32 1 +# SIVI: [[K:%[0-9]+]] = REG_SEQUENCE [[K_LO]], 1, [[K_HI]], 2 +# SIVI: [[K_SUB0:%[0-9]+]] = COPY [[K]].sub0 +# SIVI: [[PTR_LO:%[0-9]+]] = COPY [[PTR]].sub0 +# SIVI: [[ADD_PTR_LO:%[0-9]+]] = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] +# SIVI: [[K_SUB1:%[0-9]+]] = COPY [[K]].sub1 +# SIVI: [[PTR_HI:%[0-9]+]] = COPY [[PTR]].sub1 +# SIVI: [[ADD_PTR_HI:%[0-9]+]] = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] +# SIVI: [[ADD_PTR:%[0-9]+]] = REG_SEQUENCE [[ADD_PTR_LO]], 1, [[ADD_PTR_HI]], 2 +# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0 +# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741824, 0 + +body: | + bb.0: + liveins: %sgpr0_sgpr1 + + %0(p2) = COPY %sgpr0_sgpr1 + + %1(s64) = G_CONSTANT 4 + %2(p2) = G_GEP %0, %1 + %3(s32) = G_LOAD %2 :: (load 4 from %ir.const0) + + %4(s64) = G_CONSTANT 1020 + %5(p2) = G_GEP %0, %4 + %6(s32) = G_LOAD %5 :: (load 4 from %ir.const0) + + %7(s64) = G_CONSTANT 1024 + %8(p2) = G_GEP %0, %7 + %9(s32) = G_LOAD %8 :: (load 4 from %ir.const0) + + %10(s64) = G_CONSTANT 1048572 + %11(p2) = G_GEP %0, %10 + %12(s32) = G_LOAD %11 :: (load 4 from %ir.const0) + + %13(s64) = G_CONSTANT 1048576 + %14(p2) = G_GEP %0, %13 + %15(s32) = G_LOAD %14 :: (load 4 from %ir.const0) + + %16(s64) = G_CONSTANT 17179869180 + %17(p2) = G_GEP %0, %16 + %18(s32) = G_LOAD %17 :: (load 4 from %ir.const0) + + %19(s64) = G_CONSTANT 17179869184 + %20(p2) = G_GEP %0, %19 + %21(s32) = G_LOAD %20 :: (load 4 from %ir.const0) + + %22(s64) = G_CONSTANT 4294967292 + %23(p2) = G_GEP %0, %22 + %24(s32) = G_LOAD %23 :: (load 4 from %ir.const0) + + %25(s64) = G_CONSTANT 4294967296 + %26(p2) = G_GEP %0, %25 + %27(s32) = G_LOAD %26 :: (load 4 from %ir.const0) + +... +--- Index: test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir @@ -0,0 +1,31 @@ +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN + +--- | + define void @global_addrspace(i32 addrspace(1)* %global0) { ret void } +... +--- + +name: global_addrspace +legalized: true +regBankSelected: true + +registers: + - { id: 0, class: vgpr } + - { id: 1, class: vgpr } + +# GCN: global_addrspace +# GCN: [[PTR:%[0-9]+]] = COPY %vgpr0_vgpr1 +# GCN: [[VAL:%[0-9]+]] = COPY %vgpr2 +# GCN: FLAT_STORE_DWORD [[PTR]], [[VAL]], 0, 0, 0 + +body: | + bb.0: + liveins: %vgpr0_vgpr1, %vgpr2 + + %0(p1) = COPY %vgpr0_vgpr1 + %1(s32) = COPY %vgpr2 + G_STORE %1, %0 :: (store 4 into %ir.global0) + +... +--- Index: test/CodeGen/AMDGPU/GlobalISel/shader-epilogs.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/shader-epilogs.ll @@ -0,0 +1,9 @@ +; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefix=GCN %s + +; GCN-LABEL: vs_epilog +; GCN: s_endpgm + +define amdgpu_vs void @vs_epilog() { +main_body: + ret void +} Index: test/CodeGen/AMDGPU/GlobalISel/smrd.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/smrd.ll @@ -0,0 +1,86 @@ +; RUN: llc < %s -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=SIVI %s +; RUN: llc < %s -march=amdgcn -mcpu=bonaire -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefix=CI --check-prefix=GCN %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=SIVI %s + +; SMRD load with an immediate offset. +; GCN-LABEL: {{^}}smrd0: +; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01 +; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 +define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { +entry: + %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 1 + %1 = load i32, i32 addrspace(2)* %0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; SMRD load with the largest possible immediate offset. +; GCN-LABEL: {{^}}smrd1: +; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff,0x{{[0-9]+[137]}} +; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc +define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { +entry: + %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 255 + %1 = load i32, i32 addrspace(2)* %0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; SMRD load with an offset greater than the largest possible immediate. +; GCN-LABEL: {{^}}smrd2: +; SI: s_movk_i32 s[[OFFSET:[0-9]]], 0x400 +; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]] +; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100 +; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400 +; GCN: s_endpgm +define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { +entry: + %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 256 + %1 = load i32, i32 addrspace(2)* %0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; SMRD load with a 64-bit offset +; GCN-LABEL: {{^}}smrd3: +; FIXME: There are too many copies here because we don't fold immediates +; through REG_SEQUENCE +; XSI: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0xb ; encoding: [0x0b +; TODO: Add VI checks +; XGCN: s_endpgm +define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { +entry: + %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32 + %1 = load i32, i32 addrspace(2)* %0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; SMRD load with the largest possible immediate offset on VI +; GCN-LABEL: {{^}}smrd4: +; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc +; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]] +; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff +; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc +define void @smrd4(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { +entry: + %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262143 + %1 = load i32, i32 addrspace(2)* %0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; SMRD load with an offset greater than the largest possible immediate on VI +; GCN-LABEL: {{^}}smrd5: +; SIVI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000 +; SIVI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]] +; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000 +; GCN: s_endpgm +define void @smrd5(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) { +entry: + %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262144 + %1 = load i32, i32 addrspace(2)* %0 + store i32 %1, i32 addrspace(1)* %out + ret void +} +