Diff 86301

llvm/trunk/lib/Target/AMDGPU/AMDGPU.td

	Show First 20 Lines • Show All 556 Lines • ▼ Show 20 Lines

	// Include AMDGPU TD files			// Include AMDGPU TD files
	include "R600Schedule.td"			include "R600Schedule.td"
	include "SISchedule.td"			include "SISchedule.td"
	include "Processors.td"			include "Processors.td"
	include "AMDGPUInstrInfo.td"			include "AMDGPUInstrInfo.td"
	include "AMDGPUIntrinsics.td"			include "AMDGPUIntrinsics.td"
	include "AMDGPURegisterInfo.td"			include "AMDGPURegisterInfo.td"
				include "AMDGPURegisterBanks.td"
	include "AMDGPUInstructions.td"			include "AMDGPUInstructions.td"
	include "AMDGPUCallingConv.td"			include "AMDGPUCallingConv.td"

llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.h

	Show All 16 Lines

	#include "llvm/CodeGen/GlobalISel/CallLowering.h"			#include "llvm/CodeGen/GlobalISel/CallLowering.h"

	namespace llvm {			namespace llvm {

	class AMDGPUTargetLowering;			class AMDGPUTargetLowering;

	class AMDGPUCallLowering: public CallLowering {			class AMDGPUCallLowering: public CallLowering {

				unsigned lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy,
				unsigned Offset) const;

				void lowerParameter(MachineIRBuilder &MIRBuilder, Type *ParamTy,
				unsigned Offset, unsigned DstReg) const;

	public:			public:
	AMDGPUCallLowering(const AMDGPUTargetLowering &TLI);			AMDGPUCallLowering(const AMDGPUTargetLowering &TLI);

	bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val,			bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val,
	unsigned VReg) const override;			unsigned VReg) const override;
	bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,			bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
	ArrayRef<unsigned> VRegs) const override;			ArrayRef<unsigned> VRegs) const override;
				CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
	};			};
	} // End of namespace llvm;			} // End of namespace llvm;
	#endif			#endif

llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp

	//===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//			//===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
	//			//
	// The LLVM Compiler Infrastructure			// The LLVM Compiler Infrastructure
	//			//
	// This file is distributed under the University of Illinois Open Source			// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.			// License. See LICENSE.TXT for details.
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	///			///
	/// \file			/// \file
	/// This file implements the lowering of LLVM calls to machine code calls for			/// This file implements the lowering of LLVM calls to machine code calls for
	/// GlobalISel.			/// GlobalISel.
	///			///
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#include "AMDGPUCallLowering.h"			#include "AMDGPUCallLowering.h"
				#include "AMDGPU.h"
	#include "AMDGPUISelLowering.h"			#include "AMDGPUISelLowering.h"
				#include "AMDGPUSubtarget.h"
				#include "SIISelLowering.h"
				#include "SIRegisterInfo.h"
				#include "SIMachineFunctionInfo.h"
				#include "llvm/CodeGen/CallingConvLower.h"
	#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"			#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
	#include "llvm/CodeGen/MachineInstrBuilder.h"			#include "llvm/CodeGen/MachineInstrBuilder.h"

	using namespace llvm;			using namespace llvm;

	#ifndef LLVM_BUILD_GLOBAL_ISEL			#ifndef LLVM_BUILD_GLOBAL_ISEL
	#error "This shouldn't be built without GISel"			#error "This shouldn't be built without GISel"
	#endif			#endif

	AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)			AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
	: CallLowering(&TLI) {			: CallLowering(&TLI) {
	}			}

	bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,			bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
	const Value *Val, unsigned VReg) const {			const Value *Val, unsigned VReg) const {
				MIRBuilder.buildInstr(AMDGPU::S_ENDPGM);
	return true;			return true;
	}			}

				unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
				Type *ParamTy,
				unsigned Offset) const {

				MachineFunction &MF = MIRBuilder.getMF();
				const SIRegisterInfo *TRI = MF.getSubtarget<SISubtarget>().getRegisterInfo();
				MachineRegisterInfo &MRI = MF.getRegInfo();
				const Function &F = *MF.getFunction();
				const DataLayout &DL = F.getParent()->getDataLayout();
				PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
				LLT PtrType(*PtrTy, DL);
				unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
				unsigned KernArgSegmentPtr =
				TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
				unsigned KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);

				unsigned OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
				MIRBuilder.buildConstant(OffsetReg, Offset);

				MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);

				return DstReg;
				}

				void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
				Type *ParamTy, unsigned Offset,
				unsigned DstReg) const {
				MachineFunction &MF = MIRBuilder.getMF();
				const Function &F = *MF.getFunction();
				const DataLayout &DL = F.getParent()->getDataLayout();
				PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
				MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
				unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
				unsigned Align = DL.getABITypeAlignment(ParamTy);
				unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);

				MachineMemOperand *MMO =
				MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad \|
				MachineMemOperand::MONonTemporal \|
				MachineMemOperand::MOInvariant,
				TypeSize, Align);

				MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
				}

	bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,			bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
	const Function &F,			const Function &F,
	ArrayRef<unsigned> VRegs) const {			ArrayRef<unsigned> VRegs) const {
	// TODO: Implement once there are generic loads/stores.
				MachineFunction &MF = MIRBuilder.getMF();
				const SISubtarget Subtarget = static_cast<const SISubtarget >(&MF.getSubtarget());
				MachineRegisterInfo &MRI = MF.getRegInfo();
				SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
				const SIRegisterInfo *TRI = MF.getSubtarget<SISubtarget>().getRegisterInfo();
				const DataLayout &DL = F.getParent()->getDataLayout();

				SmallVector<CCValAssign, 16> ArgLocs;
				CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());

				// FIXME: How should these inputs interact with inreg / custom SGPR inputs?
				if (Info->hasPrivateSegmentBuffer()) {
				unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
				MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
				CCInfo.AllocateReg(PrivateSegmentBufferReg);
				}

				if (Info->hasDispatchPtr()) {
				unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
				// FIXME: Need to add reg as live-in
				CCInfo.AllocateReg(DispatchPtrReg);
				}

				if (Info->hasQueuePtr()) {
				unsigned QueuePtrReg = Info->addQueuePtr(*TRI);
				// FIXME: Need to add reg as live-in
				CCInfo.AllocateReg(QueuePtrReg);
				}

				if (Info->hasKernargSegmentPtr()) {
				unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
				const LLT P2 = LLT::pointer(2, 64);
				unsigned VReg = MRI.createGenericVirtualRegister(P2);
				MRI.addLiveIn(InputPtrReg, VReg);
				MIRBuilder.getMBB().addLiveIn(InputPtrReg);
				MIRBuilder.buildCopy(VReg, InputPtrReg);
				CCInfo.AllocateReg(InputPtrReg);
				}

				if (Info->hasDispatchID()) {
				unsigned DispatchIDReg = Info->addDispatchID(*TRI);
				// FIXME: Need to add reg as live-in
				CCInfo.AllocateReg(DispatchIDReg);
				}

				if (Info->hasFlatScratchInit()) {
				unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
				// FIXME: Need to add reg as live-in
				CCInfo.AllocateReg(FlatScratchInitReg);
				}

				unsigned NumArgs = F.arg_size();
				Function::const_arg_iterator CurOrigArg = F.arg_begin();
				const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
				for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
				CurOrigArg->getType()->dump();
				MVT ValVT = TLI.getValueType(DL, CurOrigArg->getType()).getSimpleVT();
				ISD::ArgFlagsTy Flags;
				Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
				CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(),
				/IsVarArg=/false);
				bool Res =
				AssignFn(i, ValVT, ValVT, CCValAssign::Full, Flags, CCInfo);
				assert(!Res && "Call operand has unhandled type");
				(void)Res;
				}

				Function::const_arg_iterator Arg = F.arg_begin();
				for (unsigned i = 0; i != NumArgs; ++i, ++Arg) {
				// FIXME: We should be getting DebugInfo from the arguments some how.
				CCValAssign &VA = ArgLocs[i];
				lowerParameter(MIRBuilder, Arg->getType(),
				VA.getLocMemOffset() +
				Subtarget->getExplicitKernelArgOffset(MF), VRegs[i]);
				}

	return true;			return true;
	}			}

llvm/trunk/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def

				//===- AMDGPUGenRegisterBankInfo.def ------------------------------ C++ --==//
				//
				// The LLVM Compiler Infrastructure
				//
				// This file is distributed under the University of Illinois Open Source
				// License. See LICENSE.TXT for details.
				//
				//===----------------------------------------------------------------------===//
				/// \file
				/// This file defines all the static objects used by AMDGPURegisterBankInfo.
				/// \todo This should be generated by TableGen.
				//===----------------------------------------------------------------------===//

				#ifndef LLVM_BUILD_GLOBAL_ISEL
				#error "You shouldn't build this"
				#endif

				namespace llvm {
				namespace AMDGPU {

				enum PartialMappingIdx {
				None = - 1,
				PM_SGPR32 = 0,
				PM_SGPR64 = 1,
				PM_VGPR32 = 2,
				PM_VGPR64 = 3
				};

				const RegisterBankInfo::PartialMapping PartMappings[] {
				// StartIdx, Length, RegBank
				{0, 32, SGPRRegBank},
				{0, 64, SGPRRegBank},
				{0, 32, VGPRRegBank},
				{0, 64, VGPRRegBank}
				};

				const RegisterBankInfo::ValueMapping ValMappings[] {
				// SGPR 32-bit
				{&PartMappings[0], 1},
				// SGPR 64-bit
				{&PartMappings[1], 1},
				// VGPR 32-bit
				{&PartMappings[2], 1},
				// VGPR 64-bit
				{&PartMappings[3], 1}
				};

				enum ValueMappingIdx {
				SGPRStartIdx = 0,
				VGPRStartIdx = 2
				};

				const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID,
				unsigned Size) {
				assert(Size % 32 == 0);
				unsigned Idx = BankID == AMDGPU::SGPRRegBankID ? SGPRStartIdx : VGPRStartIdx;
				Idx += (Size / 32) - 1;
				return &ValMappings[Idx];
				}

				} // End AMDGPU namespace.
				} // End llvm namespace.

llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Show All 9 Lines
/// \file		/// \file
/// \brief This is the parent TargetLowering class for hardware code gen		/// \brief This is the parent TargetLowering class for hardware code gen
/// targets.		/// targets.
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#include "AMDGPUISelLowering.h"		#include "AMDGPUISelLowering.h"
#include "AMDGPU.h"		#include "AMDGPU.h"
		#include "AMDGPUCallLowering.h"
#include "AMDGPUFrameLowering.h"		#include "AMDGPUFrameLowering.h"
#include "AMDGPUIntrinsicInfo.h"		#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPURegisterInfo.h"		#include "AMDGPURegisterInfo.h"
#include "AMDGPUSubtarget.h"		#include "AMDGPUSubtarget.h"
#include "R600MachineFunctionInfo.h"		#include "R600MachineFunctionInfo.h"
#include "SIMachineFunctionInfo.h"		#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"		#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFunction.h"		#include "llvm/CodeGen/MachineFunction.h"
▲ Show 20 Lines • Show All 639 Lines • ▼ Show 20 Lines	bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const {
// not profitable, and may actually be harmful.		// not profitable, and may actually be harmful.
return SrcVT.getSizeInBits() > 32 && DestVT.getSizeInBits() == 32;		return SrcVT.getSizeInBits() > 32 && DestVT.getSizeInBits() == 32;
}		}

//===---------------------------------------------------------------------===//		//===---------------------------------------------------------------------===//
// TargetLowering Callbacks		// TargetLowering Callbacks
//===---------------------------------------------------------------------===//		//===---------------------------------------------------------------------===//

		CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC,
		bool IsVarArg) const {
		return CC_AMDGPU;
		}

/// The SelectionDAGBuilder will automatically promote function arguments		/// The SelectionDAGBuilder will automatically promote function arguments
/// with illegal types. However, this does not work for the AMDGPU targets		/// with illegal types. However, this does not work for the AMDGPU targets
/// since the function arguments are stored in memory as these illegal types.		/// since the function arguments are stored in memory as these illegal types.
/// In order to handle this properly we need to get the original types sizes		/// In order to handle this properly we need to get the original types sizes
/// from the LLVM IR Function and fixup the ISD:InputArg values before		/// from the LLVM IR Function and fixup the ISD:InputArg values before
/// passing them to AnalyzeFormalArguments()		/// passing them to AnalyzeFormalArguments()

/// When the SelectionDAGBuilder computes the Ins, it takes care of splitting		/// When the SelectionDAGBuilder computes the Ins, it takes care of splitting
▲ Show 20 Lines • Show All 2,736 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h

				//===- AMDGPUInstructionSelector --------------------------------- C++ --==//
				//
				// The LLVM Compiler Infrastructure
				//
				// This file is distributed under the University of Illinois Open Source
				// License. See LICENSE.TXT for details.
				//
				//===----------------------------------------------------------------------===//
				/// \file
				/// This file declares the targeting of the InstructionSelector class for
				/// AMDGPU.
				//===----------------------------------------------------------------------===//

				#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H
				#define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H

				#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
				#include "llvm/ADT/ArrayRef.h"
				#include "llvm/ADT/SmallVector.h"

				namespace llvm {

				class AMDGPUInstrInfo;
				class AMDGPURegisterBankInfo;
				class MachineInstr;
				class MachineOperand;
				class MachineRegisterInfo;
				class SIInstrInfo;
				class SIRegisterInfo;
				class SISubtarget;

				class AMDGPUInstructionSelector : public InstructionSelector {
				public:
				AMDGPUInstructionSelector(const SISubtarget &STI,
				const AMDGPURegisterBankInfo &RBI);

				bool select(MachineInstr &I) const override;

				private:
				struct GEPInfo {
				const MachineInstr &GEP;
				SmallVector<unsigned, 2> SgprParts;
				SmallVector<unsigned, 2> VgprParts;
				int64_t Imm;
				GEPInfo(const MachineInstr &GEP) : GEP(GEP), Imm(0) { }
				};

				MachineOperand getSubOperand64(MachineOperand &MO, unsigned SubIdx) const;
				bool selectG_CONSTANT(MachineInstr &I) const;
				bool selectG_ADD(MachineInstr &I) const;
				bool selectG_GEP(MachineInstr &I) const;
				bool hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const;
				void getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI,
				SmallVectorImpl<GEPInfo> &AddrInfo) const;
				bool selectSMRD(MachineInstr &I, ArrayRef<GEPInfo> AddrInfo) const;
				bool selectG_LOAD(MachineInstr &I) const;
				bool selectG_STORE(MachineInstr &I) const;

				const SIInstrInfo &TII;
				const SIRegisterInfo &TRI;
				const AMDGPURegisterBankInfo &RBI;
				};

				} // End llvm namespace.
				#endif

llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

				//===- AMDGPUInstructionSelector.cpp ----------------------------- C++ --==//
				//
				// The LLVM Compiler Infrastructure
				//
				// This file is distributed under the University of Illinois Open Source
				// License. See LICENSE.TXT for details.
				//
				//===----------------------------------------------------------------------===//
				/// \file
				/// This file implements the targeting of the InstructionSelector class for
				/// AMDGPU.
				/// \todo This should be generated by TableGen.
				//===----------------------------------------------------------------------===//

				#include "AMDGPUInstructionSelector.h"
				#include "AMDGPUInstrInfo.h"
				#include "AMDGPURegisterBankInfo.h"
				#include "AMDGPURegisterInfo.h"
				#include "AMDGPUSubtarget.h"
				#include "llvm/CodeGen/MachineBasicBlock.h"
				#include "llvm/CodeGen/MachineFunction.h"
				#include "llvm/CodeGen/MachineInstr.h"
				#include "llvm/CodeGen/MachineInstrBuilder.h"
				#include "llvm/CodeGen/MachineRegisterInfo.h"
				#include "llvm/IR/Type.h"
				#include "llvm/Support/Debug.h"
				#include "llvm/Support/raw_ostream.h"

				#define DEBUG_TYPE "amdgpu-isel"

				using namespace llvm;

				AMDGPUInstructionSelector::AMDGPUInstructionSelector(
				const SISubtarget &STI, const AMDGPURegisterBankInfo &RBI)
				: InstructionSelector(), TII(*STI.getInstrInfo()),
				TRI(*STI.getRegisterInfo()), RBI(RBI) {}

				MachineOperand
				AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
				unsigned SubIdx) const {

				MachineInstr *MI = MO.getParent();
				MachineBasicBlock *BB = MO.getParent()->getParent();
				MachineFunction *MF = BB->getParent();
				MachineRegisterInfo &MRI = MF->getRegInfo();
				unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);

				if (MO.isReg()) {
				unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
				unsigned Reg = MO.getReg();
				BuildMI(*BB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
				.addReg(Reg, 0, ComposedSubIdx);

				return MachineOperand::CreateReg(DstReg, MO.isDef(), MO.isImplicit(),
				MO.isKill(), MO.isDead(), MO.isUndef(),
				MO.isEarlyClobber(), 0, MO.isDebug(),
				MO.isInternalRead());
				}

				assert(MO.isImm());

				APInt Imm(64, MO.getImm());

				switch (SubIdx) {
				default:
				llvm_unreachable("do not know to split immediate with this sub index.");
				case AMDGPU::sub0:
				return MachineOperand::CreateImm(Imm.getLoBits(32).getSExtValue());
				case AMDGPU::sub1:
				return MachineOperand::CreateImm(Imm.getHiBits(32).getSExtValue());
				}
				}

				bool AMDGPUInstructionSelector::selectG_ADD(MachineInstr &I) const {
				MachineBasicBlock *BB = I.getParent();
				MachineFunction *MF = BB->getParent();
				MachineRegisterInfo &MRI = MF->getRegInfo();
				unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
				unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
				unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);

				if (Size != 64)
				return false;

				DebugLoc DL = I.getDebugLoc();

				BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
				.add(getSubOperand64(I.getOperand(1), AMDGPU::sub0))
				.add(getSubOperand64(I.getOperand(2), AMDGPU::sub0));

				BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
				.add(getSubOperand64(I.getOperand(1), AMDGPU::sub1))
				.add(getSubOperand64(I.getOperand(2), AMDGPU::sub1));

				BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg())
				.addReg(DstLo)
				.addImm(AMDGPU::sub0)
				.addReg(DstHi)
				.addImm(AMDGPU::sub1);

				for (MachineOperand &MO : I.explicit_operands()) {
				if (!MO.isReg() \|\| TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
				continue;
				RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI);
				}

				I.eraseFromParent();
				return true;
				}

				bool AMDGPUInstructionSelector::selectG_GEP(MachineInstr &I) const {
				return selectG_ADD(I);
				}

				bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
				MachineBasicBlock *BB = I.getParent();
				DebugLoc DL = I.getDebugLoc();

				// FIXME: Select store instruction based on address space
				MachineInstr Flat = BuildMI(BB, &I, DL, TII.get(AMDGPU::FLAT_STORE_DWORD))
				.add(I.getOperand(1))
				.add(I.getOperand(0))
				.addImm(0)
				.addImm(0)
				.addImm(0);

				// Now that we selected an opcode, we need to constrain the register
				// operands to use appropriate classes.
				bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);

				I.eraseFromParent();
				return Ret;
				}

				bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
				MachineBasicBlock *BB = I.getParent();
				MachineFunction *MF = BB->getParent();
				MachineRegisterInfo &MRI = MF->getRegInfo();
				unsigned DstReg = I.getOperand(0).getReg();
				unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);

				if (Size == 32) {
				I.setDesc(TII.get(AMDGPU::S_MOV_B32));
				return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
				}

				assert(Size == 64);

				DebugLoc DL = I.getDebugLoc();
				unsigned LoReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
				unsigned HiReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
				const APInt &Imm = I.getOperand(1).getCImm()->getValue();

				BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), LoReg)
				.addImm(Imm.trunc(32).getZExtValue());

				BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), HiReg)
				.addImm(Imm.ashr(32).getZExtValue());

				BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
				.addReg(LoReg)
				.addImm(AMDGPU::sub0)
				.addReg(HiReg)
				.addImm(AMDGPU::sub1);
				// We can't call constrainSelectedInstRegOperands here, because it doesn't
				// work for target independent opcodes
				I.eraseFromParent();
				return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, MRI);
				}

				static bool isConstant(const MachineInstr &MI) {
				return MI.getOpcode() == TargetOpcode::G_CONSTANT;
				}

				void AMDGPUInstructionSelector::getAddrModeInfo(const MachineInstr &Load,
				const MachineRegisterInfo &MRI, SmallVectorImpl<GEPInfo> &AddrInfo) const {

				const MachineInstr *PtrMI = MRI.getUniqueVRegDef(Load.getOperand(1).getReg());

				assert(PtrMI);

				if (PtrMI->getOpcode() != TargetOpcode::G_GEP)
				return;

				GEPInfo GEPInfo(*PtrMI);

				for (unsigned i = 1, e = 3; i < e; ++i) {
				const MachineOperand &GEPOp = PtrMI->getOperand(i);
				const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.getReg());
				assert(OpDef);
				if (isConstant(*OpDef)) {
				// FIXME: Is it possible to have multiple Imm parts? Maybe if we
				// are lacking other optimizations.
				assert(GEPInfo.Imm == 0);
				GEPInfo.Imm = OpDef->getOperand(1).getCImm()->getSExtValue();
				continue;
				}
				const RegisterBank *OpBank = RBI.getRegBank(GEPOp.getReg(), MRI, TRI);
				if (OpBank->getID() == AMDGPU::SGPRRegBankID)
				GEPInfo.SgprParts.push_back(GEPOp.getReg());
				else
				GEPInfo.VgprParts.push_back(GEPOp.getReg());
				}

				AddrInfo.push_back(GEPInfo);
				getAddrModeInfo(*PtrMI, MRI, AddrInfo);
				}

				static bool isInstrUniform(const MachineInstr &MI) {
				if (!MI.hasOneMemOperand())
				return false;

				const MachineMemOperand MMO = MI.memoperands_begin();
				const Value *Ptr = MMO->getValue();

				// UndefValue means this is a load of a kernel input. These are uniform.
				// Sometimes LDS instructions have constant pointers.
				// If Ptr is null, then that means this mem operand contains a
				// PseudoSourceValue like GOT.
				if (!Ptr \|\| isa<UndefValue>(Ptr) \|\| isa<Argument>(Ptr) \|\|
				isa<Constant>(Ptr) \|\| isa<GlobalValue>(Ptr))
				return true;

				const Instruction *I = dyn_cast<Instruction>(Ptr);
				return I && I->getMetadata("amdgpu.uniform");
				}

				static unsigned getSmrdOpcode(unsigned BaseOpcode, unsigned LoadSize) {

				if (LoadSize == 32)
				return BaseOpcode;

				switch (BaseOpcode) {
				case AMDGPU::S_LOAD_DWORD_IMM:
				switch (LoadSize) {
				case 64:
				return AMDGPU::S_LOAD_DWORDX2_IMM;
				case 128:
				return AMDGPU::S_LOAD_DWORDX4_IMM;
				case 256:
				return AMDGPU::S_LOAD_DWORDX8_IMM;
				case 512:
				return AMDGPU::S_LOAD_DWORDX16_IMM;
				}
				break;
				case AMDGPU::S_LOAD_DWORD_IMM_ci:
				switch (LoadSize) {
				case 64:
				return AMDGPU::S_LOAD_DWORDX2_IMM_ci;
				case 128:
				return AMDGPU::S_LOAD_DWORDX4_IMM_ci;
				case 256:
				return AMDGPU::S_LOAD_DWORDX8_IMM_ci;
				case 512:
				return AMDGPU::S_LOAD_DWORDX16_IMM_ci;
				}
				break;
				case AMDGPU::S_LOAD_DWORD_SGPR:
				switch (LoadSize) {
				case 64:
				return AMDGPU::S_LOAD_DWORDX2_SGPR;
				case 128:
				return AMDGPU::S_LOAD_DWORDX4_SGPR;
				case 256:
				return AMDGPU::S_LOAD_DWORDX8_SGPR;
				case 512:
				return AMDGPU::S_LOAD_DWORDX16_SGPR;
				}
				break;
				}
				llvm_unreachable("Invalid base smrd opcode or size");
				}

				bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const {
				for (const GEPInfo &GEPInfo : AddrInfo) {
				if (!GEPInfo.VgprParts.empty())
				return true;
				}
				return false;
				}

				bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I,
				ArrayRef<GEPInfo> AddrInfo) const {

				if (!I.hasOneMemOperand())
				return false;

				if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS)
				return false;

				if (!isInstrUniform(I))
				return false;

				if (hasVgprParts(AddrInfo))
				return false;

				MachineBasicBlock *BB = I.getParent();
				MachineFunction *MF = BB->getParent();
				const SISubtarget &Subtarget = MF->getSubtarget<SISubtarget>();
				MachineRegisterInfo &MRI = MF->getRegInfo();
				unsigned DstReg = I.getOperand(0).getReg();
				const DebugLoc &DL = I.getDebugLoc();
				unsigned Opcode;
				unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);

				if (!AddrInfo.empty() && AddrInfo[0].SgprParts.size() == 1) {

				const GEPInfo &GEPInfo = AddrInfo[0];

				unsigned PtrReg = GEPInfo.SgprParts[0];
				int64_t EncodedImm = AMDGPU::getSMRDEncodedOffset(Subtarget, GEPInfo.Imm);
				if (AMDGPU::isLegalSMRDImmOffset(Subtarget, GEPInfo.Imm)) {
				Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize);

				MachineInstr SMRD = BuildMI(BB, &I, DL, TII.get(Opcode), DstReg)
				.addReg(PtrReg)
				.addImm(EncodedImm)
				.addImm(0); // glc
				return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
				}

				if (Subtarget.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS &&
				isUInt<32>(EncodedImm)) {
				Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM_ci, LoadSize);
				MachineInstr SMRD = BuildMI(BB, &I, DL, TII.get(Opcode), DstReg)
				.addReg(PtrReg)
				.addImm(EncodedImm)
				.addImm(0); // glc
				return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
				}

				if (isUInt<32>(GEPInfo.Imm)) {
				Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_SGPR, LoadSize);
				unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
				BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), OffsetReg)
				.addImm(GEPInfo.Imm);

				MachineInstr SMRD = BuildMI(BB, &I, DL, TII.get(Opcode), DstReg)
				.addReg(PtrReg)
				.addReg(OffsetReg)
				.addImm(0); // glc
				return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
				}
				}

				unsigned PtrReg = I.getOperand(1).getReg();
				Opcode = getSmrdOpcode(AMDGPU::S_LOAD_DWORD_IMM, LoadSize);
				MachineInstr SMRD = BuildMI(BB, &I, DL, TII.get(Opcode), DstReg)
				.addReg(PtrReg)
				.addImm(0)
				.addImm(0); // glc
				return constrainSelectedInstRegOperands(*SMRD, TII, TRI, RBI);
				}


				bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const {
				MachineBasicBlock *BB = I.getParent();
				MachineFunction *MF = BB->getParent();
				MachineRegisterInfo &MRI = MF->getRegInfo();
				DebugLoc DL = I.getDebugLoc();
				unsigned DstReg = I.getOperand(0).getReg();
				unsigned PtrReg = I.getOperand(1).getReg();
				unsigned LoadSize = RBI.getSizeInBits(DstReg, MRI, TRI);
				unsigned Opcode;

				SmallVector<GEPInfo, 4> AddrInfo;

				getAddrModeInfo(I, MRI, AddrInfo);

				if (selectSMRD(I, AddrInfo)) {
				I.eraseFromParent();
				return true;
				}

				switch (LoadSize) {
				default:
				llvm_unreachable("Load size not supported\n");
				case 32:
				Opcode = AMDGPU::FLAT_LOAD_DWORD;
				break;
				case 64:
				Opcode = AMDGPU::FLAT_LOAD_DWORDX2;
				break;
				}

				MachineInstr Flat = BuildMI(BB, &I, DL, TII.get(Opcode))
				.add(I.getOperand(0))
				.addReg(PtrReg)
				.addImm(0)
				.addImm(0)
				.addImm(0);

				bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI);
				I.eraseFromParent();
				return Ret;
				}

				bool AMDGPUInstructionSelector::select(MachineInstr &I) const {

				if (!isPreISelGenericOpcode(I.getOpcode()))
				return true;

				switch (I.getOpcode()) {
				default:
				break;
				case TargetOpcode::G_ADD:
				return selectG_ADD(I);
				case TargetOpcode::G_CONSTANT:
				return selectG_CONSTANT(I);
				case TargetOpcode::G_GEP:
				return selectG_GEP(I);
				case TargetOpcode::G_LOAD:
				return selectG_LOAD(I);
				case TargetOpcode::G_STORE:
				return selectG_STORE(I);
				}
				return false;
				}

llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

				//===- AMDGPULegalizerInfo ---------------------------------------- C++ --==//
				//
				// The LLVM Compiler Infrastructure
				//
				// This file is distributed under the University of Illinois Open Source
				// License. See LICENSE.TXT for details.
				//
				//===----------------------------------------------------------------------===//
				/// \file
				/// This file declares the targeting of the Machinelegalizer class for
				/// AMDGPU.
				/// \todo This should be generated by TableGen.
				//===----------------------------------------------------------------------===//

				#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINELEGALIZER_H
				#define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINELEGALIZER_H

				#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"

				namespace llvm {

				class LLVMContext;

				/// This class provides the information for the target register banks.
				class AMDGPULegalizerInfo : public LegalizerInfo {
				public:
				AMDGPULegalizerInfo();
				};
				} // End llvm namespace.
				#endif

llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

				//===- AMDGPULegalizerInfo.cpp ------------------------------------ C++ --==//
				//
				// The LLVM Compiler Infrastructure
				//
				// This file is distributed under the University of Illinois Open Source
				// License. See LICENSE.TXT for details.
				//
				//===----------------------------------------------------------------------===//
				/// \file
				/// This file implements the targeting of the Machinelegalizer class for
				/// AMDGPU.
				/// \todo This should be generated by TableGen.
				//===----------------------------------------------------------------------===//

				#include "AMDGPULegalizerInfo.h"
				#include "llvm/CodeGen/ValueTypes.h"
				#include "llvm/IR/Type.h"
				#include "llvm/IR/DerivedTypes.h"
				#include "llvm/Target/TargetOpcodes.h"
				#include "llvm/Support/Debug.h"

				using namespace llvm;

				#ifndef LLVM_BUILD_GLOBAL_ISEL
				#error "You shouldn't build this"
				#endif

				AMDGPULegalizerInfo::AMDGPULegalizerInfo() {
				using namespace TargetOpcode;

				const LLT S32 = LLT::scalar(32);
				const LLT S64 = LLT::scalar(64);
				const LLT P1 = LLT::pointer(1, 64);
				const LLT P2 = LLT::pointer(2, 64);

				setAction({G_CONSTANT, S64}, Legal);

				setAction({G_GEP, P1}, Legal);
				setAction({G_GEP, P2}, Legal);
				setAction({G_GEP, 1, S64}, Legal);

				setAction({G_LOAD, P1}, Legal);
				setAction({G_LOAD, P2}, Legal);
				setAction({G_LOAD, S32}, Legal);
				setAction({G_LOAD, 1, P1}, Legal);
				setAction({G_LOAD, 1, P2}, Legal);

				setAction({G_STORE, S32}, Legal);
				setAction({G_STORE, 1, P1}, Legal);

				// FIXME: When RegBankSelect inserts copies, it will only create new
				// registers with scalar types. This means we can end up with
				// G_LOAD/G_STORE/G_GEP instruction with scalar types for their pointer
				// operands. In assert builds, the instruction selector will assert
				// if it sees a generic instruction which isn't legal, so we need to
				// tell it that scalar types are legal for pointer operands
				setAction({G_GEP, S64}, Legal);
				setAction({G_LOAD, 1, S64}, Legal);
				setAction({G_STORE, 1, S64}, Legal);

				computeTables();
				}

llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h

				//===- AMDGPURegisterBankInfo ------------------------------------ C++ --==//
				//
				// The LLVM Compiler Infrastructure
				//
				// This file is distributed under the University of Illinois Open Source
				// License. See LICENSE.TXT for details.
				//
				//===----------------------------------------------------------------------===//
				/// \file
				/// This file declares the targeting of the RegisterBankInfo class for AMDGPU.
				/// \todo This should be generated by TableGen.
				//===----------------------------------------------------------------------===//

				#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERBANKINFO_H
				#define LLVM_LIB_TARGET_AMDGPU_AMDGPUREGISTERBANKINFO_H

				#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"

				namespace llvm {

				class SIRegisterInfo;
				class TargetRegisterInfo;

				namespace AMDGPU {
				enum {
				SGPRRegBankID = 0,
				VGPRRegBankID = 1,
				NumRegisterBanks
				};
				} // End AMDGPU namespace.

				/// This class provides the information for the target register banks.
				class AMDGPUGenRegisterBankInfo : public RegisterBankInfo {

				protected:

				#define GET_TARGET_REGBANK_CLASS
				#include "AMDGPUGenRegisterBank.inc"

				};
				class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
				const SIRegisterInfo *TRI;

				/// See RegisterBankInfo::applyMapping.
				void applyMappingImpl(const OperandsMapper &OpdMapper) const override;

				RegisterBankInfo::InstructionMapping
				getInstrMappingForLoad(const MachineInstr &MI) const;

				public:
				AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI);

				unsigned copyCost(const RegisterBank &A, const RegisterBank &B,
				unsigned Size) const override;

				const RegisterBank &
				getRegBankFromRegClass(const TargetRegisterClass &RC) const override;

				InstructionMappings
				getInstrAlternativeMappings(const MachineInstr &MI) const override;

				InstructionMapping getInstrMapping(const MachineInstr &MI) const override;
				};
				} // End llvm namespace.
				#endif

llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

				//===- AMDGPURegisterBankInfo.cpp -------------------------------- C++ --==//
				//
				// The LLVM Compiler Infrastructure
				//
				// This file is distributed under the University of Illinois Open Source
				// License. See LICENSE.TXT for details.
				//
				//===----------------------------------------------------------------------===//
				/// \file
				/// This file implements the targeting of the RegisterBankInfo class for
				/// AMDGPU.
				/// \todo This should be generated by TableGen.
				//===----------------------------------------------------------------------===//

				#include "AMDGPURegisterBankInfo.h"
				#include "AMDGPUInstrInfo.h"
				#include "SIRegisterInfo.h"
				#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
				#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
				#include "llvm/IR/Constants.h"
				#include "llvm/Target/TargetRegisterInfo.h"
				#include "llvm/Target/TargetSubtargetInfo.h"

				#define GET_TARGET_REGBANK_IMPL
				#include "AMDGPUGenRegisterBank.inc"

				// This file will be TableGen'ed at some point.
				#include "AMDGPUGenRegisterBankInfo.def"

				using namespace llvm;

				#ifndef LLVM_BUILD_GLOBAL_ISEL
				#error "You shouldn't build this"
				#endif

				AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI)
				: AMDGPUGenRegisterBankInfo(),
				TRI(static_cast<const SIRegisterInfo*>(&TRI)) {

				// HACK: Until this is fully tablegen'd
				static bool AlreadyInit = false;
				if (AlreadyInit)
				return;

				AlreadyInit = true;

				const RegisterBank &RBSGPR = getRegBank(AMDGPU::SGPRRegBankID);
				assert(&RBSGPR == &AMDGPU::SGPRRegBank);

				const RegisterBank &RBVGPR = getRegBank(AMDGPU::VGPRRegBankID);
				assert(&RBVGPR == &AMDGPU::VGPRRegBank);

				}

				unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &A,
				const RegisterBank &B,
				unsigned Size) const {
				return RegisterBankInfo::copyCost(A, B, Size);
				}

				const RegisterBank &AMDGPURegisterBankInfo::getRegBankFromRegClass(
				const TargetRegisterClass &RC) const {

				if (TRI->isSGPRClass(&RC))
				return getRegBank(AMDGPU::SGPRRegBankID);

				return getRegBank(AMDGPU::VGPRRegBankID);
				}

				RegisterBankInfo::InstructionMappings
				AMDGPURegisterBankInfo::getInstrAlternativeMappings(
				const MachineInstr &MI) const {

				const MachineFunction &MF = *MI.getParent()->getParent();
				const MachineRegisterInfo &MRI = MF.getRegInfo();

				unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);

				InstructionMappings AltMappings;
				switch (MI.getOpcode()) {
				case TargetOpcode::G_LOAD: {
				// FIXME: Should we be hard coding the size for these mappings?
				InstructionMapping SSMapping(1, 1,
				getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
				AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
				2); // Num Operands
				AltMappings.emplace_back(std::move(SSMapping));

				InstructionMapping VVMapping(2, 1,
				getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
				AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64)}),
				2); // Num Operands
				AltMappings.emplace_back(std::move(VVMapping));

				// FIXME: Should this be the pointer-size (64-bits) or the size of the
				// register that will hold the bufffer resourc (128-bits).
				InstructionMapping VSMapping(3, 1,
				getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
				AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
				2); // Num Operands
				AltMappings.emplace_back(std::move(VSMapping));

				return AltMappings;

				}
				default:
				break;
				}
				return RegisterBankInfo::getInstrAlternativeMappings(MI);
				}

				void AMDGPURegisterBankInfo::applyMappingImpl(
				const OperandsMapper &OpdMapper) const {
				return applyDefaultMapping(OpdMapper);
				}

				static bool isInstrUniform(const MachineInstr &MI) {
				if (!MI.hasOneMemOperand())
				return false;

				const MachineMemOperand MMO = MI.memoperands_begin();
				return AMDGPU::isUniformMMO(MMO);
				}

				RegisterBankInfo::InstructionMapping
				AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {

				const MachineFunction &MF = *MI.getParent()->getParent();
				const MachineRegisterInfo &MRI = MF.getRegInfo();
				RegisterBankInfo::InstructionMapping Mapping =
				InstructionMapping{1, 1, nullptr, MI.getNumOperands()};
				SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
				unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
				unsigned PtrSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);

				const ValueMapping *ValMapping;
				const ValueMapping *PtrMapping;

				if (isInstrUniform(MI)) {
				// We have a uniform instruction so we want to use an SMRD load
				ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
				PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);
				} else {
				ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
				// FIXME: What would happen if we used SGPRRegBankID here?
				PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize);
				}

				OpdsMapping[0] = ValMapping;
				OpdsMapping[1] = PtrMapping;
				Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
				return Mapping;

				// FIXME: Do we want to add a mapping for FLAT load, or should we just
				// handle that during instruction selection?
				}

				RegisterBankInfo::InstructionMapping
				AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
				RegisterBankInfo::InstructionMapping Mapping = getInstrMappingImpl(MI);

				if (Mapping.isValid())
				return Mapping;

				const MachineFunction &MF = *MI.getParent()->getParent();
				const MachineRegisterInfo &MRI = MF.getRegInfo();
				Mapping = InstructionMapping{1, 1, nullptr, MI.getNumOperands()};
				SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());

				switch (MI.getOpcode()) {
				default: break;
				case AMDGPU::G_CONSTANT: {
				unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
				OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
				Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
				return Mapping;
				}
				case AMDGPU::G_GEP: {
				for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
				if (!MI.getOperand(i).isReg())
				continue;

				unsigned Size = MRI.getType(MI.getOperand(i).getReg()).getSizeInBits();
				OpdsMapping[i] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
				}
				Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
				return Mapping;
				}
				case AMDGPU::G_STORE: {
				assert(MI.getOperand(0).isReg());
				unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
				// FIXME: We need to specify a different reg bank once scalar stores
				// are supported.
				const ValueMapping *ValMapping =
				AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
				// FIXME: Depending on the type of store, the pointer could be in
				// the SGPR Reg bank.
				// FIXME: Pointer size should be based on the address space.
				const ValueMapping *PtrMapping =
				AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);

				OpdsMapping[0] = ValMapping;
				OpdsMapping[1] = PtrMapping;
				Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
				return Mapping;
				}

				case AMDGPU::G_LOAD:
				return getInstrMappingForLoad(MI);
				}

				unsigned BankID = AMDGPU::SGPRRegBankID;

				Mapping = InstructionMapping{1, 1, nullptr, MI.getNumOperands()};
				unsigned Size = 0;
				for (unsigned Idx = 0; Idx < MI.getNumOperands(); ++Idx) {
				// If the operand is not a register default to the size of the previous
				// operand.
				// FIXME: Can't we pull the types from the MachineInstr rather than the
				// operands.
				if (MI.getOperand(Idx).isReg())
				Size = getSizeInBits(MI.getOperand(Idx).getReg(), MRI, *TRI);
				OpdsMapping.push_back(AMDGPU::getValueMapping(BankID, Size));
				}
				Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));

				return Mapping;
				}

llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBanks.td

				//=- AMDGPURegisterBank.td - Describe the AMDGPU Banks -------- tablegen --=//
				//
				// The LLVM Compiler Infrastructure
				//
				// This file is distributed under the University of Illinois Open Source
				// License. See LICENSE.TXT for details.
				//
				//===----------------------------------------------------------------------===//

				def SGPRRegBank : RegisterBank<"SGPR",
				[SReg_32, SReg_64, SReg_128, SReg_256, SReg_512]
				>;

				def VGPRRegBank : RegisterBank<"VGPR",
				[VGPR_32, VReg_64, VReg_96, VReg_128, VReg_256, VReg_512]
				>;

llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h

Show First 20 Lines • Show All 511 Lines • ▼ Show 20 Lines	const SITargetLowering *getTargetLowering() const override {
return &TLInfo;		return &TLInfo;
}		}

const CallLowering *getCallLowering() const override {		const CallLowering *getCallLowering() const override {
assert(GISel && "Access to GlobalISel APIs not set");		assert(GISel && "Access to GlobalISel APIs not set");
return GISel->getCallLowering();		return GISel->getCallLowering();
}		}

		const InstructionSelector *getInstructionSelector() const override {
		assert(GISel && "Access to GlobalISel APIs not set");
		return GISel->getInstructionSelector();
		}

		const LegalizerInfo *getLegalizerInfo() const {
		assert(GISel && "Access to GlobalISel APIs not set");
		return GISel->getLegalizerInfo();
		}

		const RegisterBankInfo *getRegBankInfo() const override {
		assert(GISel && "Access to GlobalISel APIs not set");
		return GISel->getRegBankInfo();
		}

const SIRegisterInfo *getRegisterInfo() const override {		const SIRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();		return &InstrInfo.getRegisterInfo();
}		}

void setGISelAccessor(GISelAccessor &GISel) {		void setGISelAccessor(GISelAccessor &GISel) {
this->GISel.reset(&GISel);		this->GISel.reset(&GISel);
}		}

▲ Show 20 Lines • Show All 103 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Show All 10 Lines
/// \brief The AMDGPU target machine contains all of the hardware specific		/// \brief The AMDGPU target machine contains all of the hardware specific
/// information needed to emit code for R600 and SI GPUs.		/// information needed to emit code for R600 and SI GPUs.
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#include "AMDGPUTargetMachine.h"		#include "AMDGPUTargetMachine.h"
#include "AMDGPU.h"		#include "AMDGPU.h"
#include "AMDGPUCallLowering.h"		#include "AMDGPUCallLowering.h"
		#include "AMDGPUInstructionSelector.h"
		#include "AMDGPULegalizerInfo.h"
		#include "AMDGPURegisterBankInfo.h"
#include "AMDGPUTargetObjectFile.h"		#include "AMDGPUTargetObjectFile.h"
#include "AMDGPUTargetTransformInfo.h"		#include "AMDGPUTargetTransformInfo.h"
#include "GCNSchedStrategy.h"		#include "GCNSchedStrategy.h"
#include "R600MachineScheduler.h"		#include "R600MachineScheduler.h"
#include "SIMachineScheduler.h"		#include "SIMachineScheduler.h"
#include "llvm/ADT/SmallString.h"		#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"		#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/MachineScheduler.h"		#include "llvm/CodeGen/GlobalISel/Legalizer.h"
		#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/Passes.h"		#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"		#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Support/TargetRegistry.h"		#include "llvm/Support/TargetRegistry.h"
#include "llvm/Transforms/IPO.h"		#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"		#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"		#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Scalar.h"		#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"		#include "llvm/Transforms/Scalar/GVN.h"
▲ Show 20 Lines • Show All 212 Lines • ▼ Show 20 Lines
// GCN Target Machine (SI+)		// GCN Target Machine (SI+)
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#ifdef LLVM_BUILD_GLOBAL_ISEL		#ifdef LLVM_BUILD_GLOBAL_ISEL
namespace {		namespace {

struct SIGISelActualAccessor : public GISelAccessor {		struct SIGISelActualAccessor : public GISelAccessor {
std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;		std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
		std::unique_ptr<InstructionSelector> InstSelector;
		std::unique_ptr<LegalizerInfo> Legalizer;
		std::unique_ptr<RegisterBankInfo> RegBankInfo;
const AMDGPUCallLowering *getCallLowering() const override {		const AMDGPUCallLowering *getCallLowering() const override {
return CallLoweringInfo.get();		return CallLoweringInfo.get();
}		}
		const InstructionSelector *getInstructionSelector() const override {
		return InstSelector.get();
		}
		const LegalizerInfo *getLegalizerInfo() const override {
		return Legalizer.get();
		}
		const RegisterBankInfo *getRegBankInfo() const override {
		return RegBankInfo.get();
		}
};		};

} // end anonymous namespace		} // end anonymous namespace
#endif		#endif

GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT,		GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT,
StringRef CPU, StringRef FS,		StringRef CPU, StringRef FS,
TargetOptions Options,		TargetOptions Options,
Show All 17 Lines	if (!I) {
I = llvm::make_unique<SISubtarget>(TargetTriple, GPU, FS, *this);		I = llvm::make_unique<SISubtarget>(TargetTriple, GPU, FS, *this);

#ifndef LLVM_BUILD_GLOBAL_ISEL		#ifndef LLVM_BUILD_GLOBAL_ISEL
GISelAccessor *GISel = new GISelAccessor();		GISelAccessor *GISel = new GISelAccessor();
#else		#else
SIGISelActualAccessor *GISel = new SIGISelActualAccessor();		SIGISelActualAccessor *GISel = new SIGISelActualAccessor();
GISel->CallLoweringInfo.reset(		GISel->CallLoweringInfo.reset(
new AMDGPUCallLowering(*I->getTargetLowering()));		new AMDGPUCallLowering(*I->getTargetLowering()));
		GISel->Legalizer.reset(new AMDGPULegalizerInfo());

		GISel->RegBankInfo.reset(new AMDGPURegisterBankInfo(*I->getRegisterInfo()));
		GISel->InstSelector.reset(new AMDGPUInstructionSelector(*I,
		static_cast<AMDGPURegisterBankInfo>(GISel->RegBankInfo.get())));
#endif		#endif

I->setGISelAccessor(*GISel);		I->setGISelAccessor(*GISel);
}		}

I->setScalarizeGlobalBehavior(ScalarizeGlobal);		I->setScalarizeGlobalBehavior(ScalarizeGlobal);

return I.get();		return I.get();
▲ Show 20 Lines • Show All 284 Lines • ▼ Show 20 Lines

#ifdef LLVM_BUILD_GLOBAL_ISEL		#ifdef LLVM_BUILD_GLOBAL_ISEL
bool GCNPassConfig::addIRTranslator() {		bool GCNPassConfig::addIRTranslator() {
addPass(new IRTranslator());		addPass(new IRTranslator());
return false;		return false;
}		}

bool GCNPassConfig::addLegalizeMachineIR() {		bool GCNPassConfig::addLegalizeMachineIR() {
		addPass(new Legalizer());
return false;		return false;
}		}

bool GCNPassConfig::addRegBankSelect() {		bool GCNPassConfig::addRegBankSelect() {
		addPass(new RegBankSelect());
return false;		return false;
}		}

bool GCNPassConfig::addGlobalInstructionSelect() {		bool GCNPassConfig::addGlobalInstructionSelect() {
		addPass(new InstructionSelect());
return false;		return false;
}		}

#endif		#endif

void GCNPassConfig::addPreRegAlloc() {		void GCNPassConfig::addPreRegAlloc() {
addPass(createSIShrinkInstructionsPass());		addPass(createSIShrinkInstructionsPass());
addPass(createSIWholeQuadModePass());		addPass(createSIWholeQuadModePass());
}		}

void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {		void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
▲ Show 20 Lines • Show All 54 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt

	set(LLVM_TARGET_DEFINITIONS AMDGPU.td)			set(LLVM_TARGET_DEFINITIONS AMDGPU.td)

	tablegen(LLVM AMDGPUGenRegisterInfo.inc -gen-register-info)			tablegen(LLVM AMDGPUGenRegisterInfo.inc -gen-register-info)
	tablegen(LLVM AMDGPUGenInstrInfo.inc -gen-instr-info)			tablegen(LLVM AMDGPUGenInstrInfo.inc -gen-instr-info)
	tablegen(LLVM AMDGPUGenDAGISel.inc -gen-dag-isel)			tablegen(LLVM AMDGPUGenDAGISel.inc -gen-dag-isel)
	tablegen(LLVM AMDGPUGenCallingConv.inc -gen-callingconv)			tablegen(LLVM AMDGPUGenCallingConv.inc -gen-callingconv)
	tablegen(LLVM AMDGPUGenSubtargetInfo.inc -gen-subtarget)			tablegen(LLVM AMDGPUGenSubtargetInfo.inc -gen-subtarget)
	tablegen(LLVM AMDGPUGenIntrinsics.inc -gen-tgt-intrinsic)			tablegen(LLVM AMDGPUGenIntrinsics.inc -gen-tgt-intrinsic)
	tablegen(LLVM AMDGPUGenMCCodeEmitter.inc -gen-emitter)			tablegen(LLVM AMDGPUGenMCCodeEmitter.inc -gen-emitter)
	tablegen(LLVM AMDGPUGenDFAPacketizer.inc -gen-dfa-packetizer)			tablegen(LLVM AMDGPUGenDFAPacketizer.inc -gen-dfa-packetizer)
	tablegen(LLVM AMDGPUGenAsmWriter.inc -gen-asm-writer)			tablegen(LLVM AMDGPUGenAsmWriter.inc -gen-asm-writer)
	tablegen(LLVM AMDGPUGenAsmMatcher.inc -gen-asm-matcher)			tablegen(LLVM AMDGPUGenAsmMatcher.inc -gen-asm-matcher)
	tablegen(LLVM AMDGPUGenDisassemblerTables.inc -gen-disassembler)			tablegen(LLVM AMDGPUGenDisassemblerTables.inc -gen-disassembler)
	tablegen(LLVM AMDGPUGenMCPseudoLowering.inc -gen-pseudo-lowering)			tablegen(LLVM AMDGPUGenMCPseudoLowering.inc -gen-pseudo-lowering)
				if(LLVM_BUILD_GLOBAL_ISEL)
				tablegen(LLVM AMDGPUGenRegisterBank.inc -gen-register-bank)
				endif()
	add_public_tablegen_target(AMDGPUCommonTableGen)			add_public_tablegen_target(AMDGPUCommonTableGen)

	# List of all GlobalISel files.			# List of all GlobalISel files.
	set(GLOBAL_ISEL_FILES			set(GLOBAL_ISEL_FILES
	AMDGPUCallLowering.cpp			AMDGPUCallLowering.cpp
				AMDGPUInstructionSelector.cpp
				AMDGPULegalizerInfo.cpp
				AMDGPURegisterBankInfo.cpp
	)			)

	# Add GlobalISel files to the dependencies if the user wants to build it.			# Add GlobalISel files to the dependencies if the user wants to build it.
	if(LLVM_BUILD_GLOBAL_ISEL)			if(LLVM_BUILD_GLOBAL_ISEL)
	set(GLOBAL_ISEL_BUILD_FILES ${GLOBAL_ISEL_FILES})			set(GLOBAL_ISEL_BUILD_FILES ${GLOBAL_ISEL_FILES})
	else()			else()
	set(GLOBAL_ISEL_BUILD_FILES"")			set(GLOBAL_ISEL_BUILD_FILES"")
	set(LLVM_OPTIONAL_SOURCES LLVMGlobalISel ${GLOBAL_ISEL_FILES})			set(LLVM_OPTIONAL_SOURCES LLVMGlobalISel ${GLOBAL_ISEL_FILES})
	▲ Show 20 Lines • Show All 69 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

	//===-- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information--------------===//			//===-- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information--------------===//
	//			//
	// The LLVM Compiler Infrastructure			// The LLVM Compiler Infrastructure
	//			//
	// This file is distributed under the University of Illinois Open Source			// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.			// License. See LICENSE.TXT for details.
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	#include "AMDGPUBaseInfo.h"			#include "AMDGPUBaseInfo.h"
	#include "AMDGPU.h"			#include "AMDGPU.h"
	#include "SIDefines.h"			#include "SIDefines.h"
	#include "llvm/CodeGen/MachineMemOperand.h"			#include "llvm/CodeGen/MachineMemOperand.h"
	#include "llvm/IR/LLVMContext.h"
	#include "llvm/IR/Constants.h"			#include "llvm/IR/Constants.h"
	#include "llvm/IR/Function.h"			#include "llvm/IR/Function.h"
	#include "llvm/IR/GlobalValue.h"			#include "llvm/IR/GlobalValue.h"
				#include "llvm/IR/LLVMContext.h"
	#include "llvm/MC/MCContext.h"			#include "llvm/MC/MCContext.h"
	#include "llvm/MC/MCInstrInfo.h"			#include "llvm/MC/MCInstrInfo.h"
	#include "llvm/MC/MCRegisterInfo.h"			#include "llvm/MC/MCRegisterInfo.h"
	#include "llvm/MC/MCSectionELF.h"			#include "llvm/MC/MCSectionELF.h"
	#include "llvm/MC/MCSubtargetInfo.h"			#include "llvm/MC/MCSubtargetInfo.h"
	#include "llvm/MC/SubtargetFeature.h"			#include "llvm/MC/SubtargetFeature.h"

	#define GET_SUBTARGETINFO_ENUM			#define GET_SUBTARGETINFO_ENUM
	▲ Show 20 Lines • Show All 476 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir

				# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - \| FileCheck %s -check-prefixes=GCN
				# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - \| FileCheck %s -check-prefixes=GCN

				--- \|
				define void @global_addrspace(i32 addrspace(1)* %global0) { ret void }
				...
				---

				name: global_addrspace
				legalized: true
				regBankSelected: true

				# GCN: global_addrspace
				# GCN: [[PTR:%[0-9]+]] = COPY %vgpr0_vgpr1
				# GCN: FLAT_LOAD_DWORD [[PTR]], 0, 0, 0

				body: \|
				bb.0:
				liveins: %vgpr0_vgpr1

				%0:vgpr(p1) = COPY %vgpr0_vgpr1
				%1:vgpr(s32) = G_LOAD %0 :: (load 4 from %ir.global0)

				...
				---

llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir

				# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - \| FileCheck %s -check-prefixes=GCN,SI,SICI,SIVI
				# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - \| FileCheck %s -check-prefixes=GCN,CI,SICI
				# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - \| FileCheck %s -check-prefixes=GCN,VI,SIVI

				--- \|
				define void @smrd_imm(i32 addrspace(2)* %const0) { ret void }
				...
				---

				name: smrd_imm
				legalized: true
				regBankSelected: true

				# GCN: body:
				# GCN: [[PTR:%[0-9]+]] = COPY %sgpr0_sgpr1

				# Immediate offset:
				# SICI: S_LOAD_DWORD_IMM [[PTR]], 1, 0
				# VI: S_LOAD_DWORD_IMM [[PTR]], 4, 0

				# Max immediate offset for SI
				# SICI: S_LOAD_DWORD_IMM [[PTR]], 255, 0
				# VI: S_LOAD_DWORD_IMM [[PTR]], 1020, 0

				# Immediate overflow for SI
				# FIXME: The immediate gets selected twice, once into the
				# S_LOAD_DWORD instruction and once just as a normal constat.
				# SI: S_MOV_B32 1024
				# SI: [[K1024:%[0-9]+]] = S_MOV_B32 1024
				# SI: S_LOAD_DWORD_SGPR [[PTR]], [[K1024]], 0
				# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 256, 0
				# VI: S_LOAD_DWORD_IMM [[PTR]], 1024, 0

				# Max immediate offset for VI
				# SI: S_MOV_B32 1048572
				# SI: [[K1048572:%[0-9]+]] = S_MOV_B32 1048572
				# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262143
				# VI: S_LOAD_DWORD_IMM [[PTR]], 1048572

				#
				# Immediate overflow for VI
				# FIXME: The immediate gets selected twice, once into the
				# S_LOAD_DWORD instruction and once just as a normal constat.
				# SIVI: S_MOV_B32 1048576
				# SIVI: [[K1048576:%[0-9]+]] = S_MOV_B32 1048576
				# SIVI: S_LOAD_DWORD_SGPR [[PTR]], [[K1048576]], 0
				# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262144, 0

				# Max immediate for CI
				# SIVI: [[K_LO:%[0-9]+]] = S_MOV_B32 4294967292
				# SIVI: [[K_HI:%[0-9]+]] = S_MOV_B32 3
				# SIVI: [[K:%[0-9]+]] = REG_SEQUENCE [[K_LO]], 1, [[K_HI]], 2
				# SIVI: [[K_SUB0:%[0-9]+]] = COPY [[K]].sub0
				# SIVI: [[PTR_LO:%[0-9]+]] = COPY [[PTR]].sub0
				# SIVI: [[ADD_PTR_LO:%[0-9]+]] = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
				# SIVI: [[K_SUB1:%[0-9]+]] = COPY [[K]].sub1
				# SIVI: [[PTR_HI:%[0-9]+]] = COPY [[PTR]].sub1
				# SIVI: [[ADD_PTR_HI:%[0-9]+]] = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
				# SIVI: [[ADD_PTR:%[0-9]+]] = REG_SEQUENCE [[ADD_PTR_LO]], 1, [[ADD_PTR_HI]], 2
				# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0
				# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 4294967295, 0

				# Immediate overflow for CI
				# GCN: [[K_LO:%[0-9]+]] = S_MOV_B32 0
				# GCN: [[K_HI:%[0-9]+]] = S_MOV_B32 4
				# GCN: [[K:%[0-9]+]] = REG_SEQUENCE [[K_LO]], 1, [[K_HI]], 2
				# GCN: [[K_SUB0:%[0-9]+]] = COPY [[K]].sub0
				# GCN: [[PTR_LO:%[0-9]+]] = COPY [[PTR]].sub0
				# GCN: [[ADD_PTR_LO:%[0-9]+]] = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
				# GCN: [[K_SUB1:%[0-9]+]] = COPY [[K]].sub1
				# GCN: [[PTR_HI:%[0-9]+]] = COPY [[PTR]].sub1
				# GCN: [[ADD_PTR_HI:%[0-9]+]] = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
				# GCN: [[ADD_PTR:%[0-9]+]] = REG_SEQUENCE [[ADD_PTR_LO]], 1, [[ADD_PTR_HI]], 2
				# GCN: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0

				# Max 32-bit byte offset
				# FIXME: The immediate gets selected twice, once into the
				# S_LOAD_DWORD instruction and once just as a normal constat.
				# SIVI: S_MOV_B32 4294967292
				# SIVI: [[K4294967292:%[0-9]+]] = S_MOV_B32 4294967292
				# SIVI: S_LOAD_DWORD_SGPR [[PTR]], [[K4294967292]], 0
				# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741823, 0

				# Overflow 32-bit byte offset
				# SIVI: [[K_LO:%[0-9]+]] = S_MOV_B32 0
				# SIVI: [[K_HI:%[0-9]+]] = S_MOV_B32 1
				# SIVI: [[K:%[0-9]+]] = REG_SEQUENCE [[K_LO]], 1, [[K_HI]], 2
				# SIVI: [[K_SUB0:%[0-9]+]] = COPY [[K]].sub0
				# SIVI: [[PTR_LO:%[0-9]+]] = COPY [[PTR]].sub0
				# SIVI: [[ADD_PTR_LO:%[0-9]+]] = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
				# SIVI: [[K_SUB1:%[0-9]+]] = COPY [[K]].sub1
				# SIVI: [[PTR_HI:%[0-9]+]] = COPY [[PTR]].sub1
				# SIVI: [[ADD_PTR_HI:%[0-9]+]] = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
				# SIVI: [[ADD_PTR:%[0-9]+]] = REG_SEQUENCE [[ADD_PTR_LO]], 1, [[ADD_PTR_HI]], 2
				# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0
				# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741824, 0

				body: \|
				bb.0:
				liveins: %sgpr0_sgpr1

				%0:sgpr(p2) = COPY %sgpr0_sgpr1

				%1:sgpr(s64) = G_CONSTANT i64 4
				%2:sgpr(p2) = G_GEP %0, %1
				%3:sgpr(s32) = G_LOAD %2 :: (load 4 from %ir.const0)

				%4:sgpr(s64) = G_CONSTANT i64 1020
				%5:sgpr(p2) = G_GEP %0, %4
				%6:sgpr(s32) = G_LOAD %5 :: (load 4 from %ir.const0)

				%7:sgpr(s64) = G_CONSTANT i64 1024
				%8:sgpr(p2) = G_GEP %0, %7
				%9:sgpr(s32) = G_LOAD %8 :: (load 4 from %ir.const0)

				%10:sgpr(s64) = G_CONSTANT i64 1048572
				%11:sgpr(p2) = G_GEP %0, %10
				%12:sgpr(s32) = G_LOAD %11 :: (load 4 from %ir.const0)

				%13:sgpr(s64) = G_CONSTANT i64 1048576
				%14:sgpr(p2) = G_GEP %0, %13
				%15:sgpr(s32) = G_LOAD %14 :: (load 4 from %ir.const0)

				%16:sgpr(s64) = G_CONSTANT i64 17179869180
				%17:sgpr(p2) = G_GEP %0, %16
				%18:sgpr(s32) = G_LOAD %17 :: (load 4 from %ir.const0)

				%19:sgpr(s64) = G_CONSTANT i64 17179869184
				%20:sgpr(p2) = G_GEP %0, %19
				%21:sgpr(s32) = G_LOAD %20 :: (load 4 from %ir.const0)

				%22:sgpr(s64) = G_CONSTANT i64 4294967292
				%23:sgpr(p2) = G_GEP %0, %22
				%24:sgpr(s32) = G_LOAD %23 :: (load 4 from %ir.const0)

				%25:sgpr(s64) = G_CONSTANT i64 4294967296
				%26:sgpr(p2) = G_GEP %0, %25
				%27:sgpr(s32) = G_LOAD %26 :: (load 4 from %ir.const0)

				...
				---

llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir

				# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - \| FileCheck %s -check-prefixes=GCN
				# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - \| FileCheck %s -check-prefixes=GCN

				--- \|
				define void @global_addrspace(i32 addrspace(1)* %global0) { ret void }
				...
				---

				name: global_addrspace
				legalized: true
				regBankSelected: true

				# GCN: global_addrspace
				# GCN: [[PTR:%[0-9]+]] = COPY %vgpr0_vgpr1
				# GCN: [[VAL:%[0-9]+]] = COPY %vgpr2
				# GCN: FLAT_STORE_DWORD [[PTR]], [[VAL]], 0, 0, 0

				body: \|
				bb.0:
				liveins: %vgpr0_vgpr1, %vgpr2

				%0:vgpr(p1) = COPY %vgpr0_vgpr1
				%1:vgpr(s32) = COPY %vgpr2
				G_STORE %1, %0 :: (store 4 into %ir.global0)

				...
				---

llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir

				# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=regbankselect -global-isel %s -verify-machineinstrs -o - \| FileCheck %s

				--- \|
				define void @load_constant(i32 addrspace(2)* %ptr0) { ret void }
				define void @load_global_uniform(i32 addrspace(1)* %ptr1) {
				%tmp0 = load i32, i32 addrspace(1)* %ptr1
				ret void
				}
				define void @load_global_non_uniform(i32 addrspace(1)* %ptr2) {
				%tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0
				%tmp1 = getelementptr i32, i32 addrspace(1)* %ptr2, i32 %tmp0
				%tmp2 = load i32, i32 addrspace(1)* %tmp1
				ret void
				}
				declare i32 @llvm.amdgcn.workitem.id.x() #0
				attributes #0 = { nounwind readnone }
				...

				---
				name : load_constant
				legalized: true

				# CHECK-LABEL: name: load_constant
				# CHECK: registers:
				# CHECK: - { id: 0, class: sgpr }
				# CHECK: - { id: 1, class: sgpr }

				body: \|
				bb.0:
				liveins: %sgpr0_sgpr1
				%0:_(p2) = COPY %sgpr0_sgpr1
				%1:_(s32) = G_LOAD %0 :: (load 4 from %ir.ptr0)
				...

				---
				name: load_global_uniform
				legalized: true

				# CHECK-LABEL: name: load_global_uniform
				# CHECK: registers:
				# CHECK: - { id: 0, class: sgpr }
				# CHECK: - { id: 1, class: sgpr }

				body: \|
				bb.0:
				liveins: %sgpr0_sgpr1
				%0:_(p1) = COPY %sgpr0_sgpr1
				%1:_(s32) = G_LOAD %0 :: (load 4 from %ir.ptr1)
				...

				---
				name: load_global_non_uniform
				legalized: true

				# CHECK-LABEL: name: load_global_non_uniform
				# CHECK: registers:
				# CHECK: - { id: 0, class: sgpr }
				# CHECK: - { id: 1, class: vgpr }
				# CHECK: - { id: 2, class: vgpr }


				body: \|
				bb.0:
				liveins: %sgpr0_sgpr1
				%0:_(p1) = COPY %sgpr0_sgpr1
				%1:_(s32) = G_LOAD %0 :: (load 4 from %ir.tmp1)
				...

llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/shader-epilogs.ll

				; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs -global-isel \| FileCheck --check-prefix=GCN %s

				; GCN-LABEL: vs_epilog
				; GCN: s_endpgm

				define amdgpu_vs void @vs_epilog() {
				main_body:
				ret void
				}

llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/smrd.ll

				; FIXME: Need to add support for mubuf stores to enable this on SI.
				; XUN: llc < %s -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs -global-isel \| FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=SIVI %s
				; RUN: llc < %s -march=amdgcn -mcpu=bonaire -show-mc-encoding -verify-machineinstrs -global-isel \| FileCheck --check-prefix=CI --check-prefix=GCN %s
				; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs -global-isel \| FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=SIVI %s

				; SMRD load with an immediate offset.
				; GCN-LABEL: {{^}}smrd0:
				; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
				; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4
				define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
				entry:
				%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 1
				%1 = load i32, i32 addrspace(2)* %0
				store i32 %1, i32 addrspace(1)* %out
				ret void
				}

				; SMRD load with the largest possible immediate offset.
				; GCN-LABEL: {{^}}smrd1:
				; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff,0x{{[0-9]+[137]}}
				; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
				define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
				entry:
				%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 255
				%1 = load i32, i32 addrspace(2)* %0
				store i32 %1, i32 addrspace(1)* %out
				ret void
				}

				; SMRD load with an offset greater than the largest possible immediate.
				; GCN-LABEL: {{^}}smrd2:
				; SI: s_movk_i32 s[[OFFSET:[0-9]]], 0x400
				; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
				; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
				; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
				; GCN: s_endpgm
				define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
				entry:
				%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 256
				%1 = load i32, i32 addrspace(2)* %0
				store i32 %1, i32 addrspace(1)* %out
				ret void
				}

				; SMRD load with a 64-bit offset
				; GCN-LABEL: {{^}}smrd3:
				; FIXME: There are too many copies here because we don't fold immediates
				; through REG_SEQUENCE
				; XSI: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0xb ; encoding: [0x0b
				; TODO: Add VI checks
				; XGCN: s_endpgm
				define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
				entry:
				%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32
				%1 = load i32, i32 addrspace(2)* %0
				store i32 %1, i32 addrspace(1)* %out
				ret void
				}

				; SMRD load with the largest possible immediate offset on VI
				; GCN-LABEL: {{^}}smrd4:
				; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc
				; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
				; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
				; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
				define void @smrd4(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
				entry:
				%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262143
				%1 = load i32, i32 addrspace(2)* %0
				store i32 %1, i32 addrspace(1)* %out
				ret void
				}

				; SMRD load with an offset greater than the largest possible immediate on VI
				; GCN-LABEL: {{^}}smrd5:
				; SIVI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000
				; SIVI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
				; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
				; GCN: s_endpgm
				define void @smrd5(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
				entry:
				%0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262144
				%1 = load i32, i32 addrspace(2)* %0
				store i32 %1, i32 addrspace(1)* %out
				ret void
				}

This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU/GlobalISel: Add support for simple shaders
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 86301

llvm/trunk/lib/Target/AMDGPU/AMDGPU.td

llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.h

llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp

llvm/trunk/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def

llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h

llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h

llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBanks.td

llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h

llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt

llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir

llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir

llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir

llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir

llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/shader-epilogs.ll

llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/smrd.ll

This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU/GlobalISel: Add support for simple shadersClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 86301

llvm/trunk/lib/Target/AMDGPU/AMDGPU.td

llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.h

llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp

llvm/trunk/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def

llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.h

llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h

llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterBanks.td

llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h

llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt

llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir

llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir

llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir

llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir

llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/shader-epilogs.ll

llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/smrd.ll

AMDGPU/GlobalISel: Add support for simple shaders
ClosedPublic