Diff 58522

lib/Target/NVPTX/CMakeLists.txt

Show All 26 Lines	set(NVPTXCodeGen_sources
NVPTXMCExpr.cpp		NVPTXMCExpr.cpp
NVPTXPrologEpilogPass.cpp		NVPTXPrologEpilogPass.cpp
NVPTXRegisterInfo.cpp		NVPTXRegisterInfo.cpp
NVPTXReplaceImageHandles.cpp		NVPTXReplaceImageHandles.cpp
NVPTXSubtarget.cpp		NVPTXSubtarget.cpp
NVPTXTargetMachine.cpp		NVPTXTargetMachine.cpp
NVPTXTargetTransformInfo.cpp		NVPTXTargetTransformInfo.cpp
NVPTXUtilities.cpp		NVPTXUtilities.cpp
		NVVMIntrRange.cpp
NVVMReflect.cpp		NVVMReflect.cpp
)		)

add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})		add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})

add_subdirectory(TargetInfo)		add_subdirectory(TargetInfo)
add_subdirectory(InstPrinter)		add_subdirectory(InstPrinter)
add_subdirectory(MCTargetDesc)		add_subdirectory(MCTargetDesc)

lib/Target/NVPTX/NVPTX.h

	Show First 20 Lines • Show All 41 Lines • ▼ Show 20 Lines
	}			}

	FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,			FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
	llvm::CodeGenOpt::Level OptLevel);			llvm::CodeGenOpt::Level OptLevel);
	ModulePass *createNVPTXAssignValidGlobalNamesPass();			ModulePass *createNVPTXAssignValidGlobalNamesPass();
	ModulePass *createGenericToNVVMPass();			ModulePass *createGenericToNVVMPass();
	FunctionPass *createNVPTXFavorNonGenericAddrSpacesPass();			FunctionPass *createNVPTXFavorNonGenericAddrSpacesPass();
	FunctionPass *createNVPTXInferAddressSpacesPass();			FunctionPass *createNVPTXInferAddressSpacesPass();
				FunctionPass *createNVVMIntrRangePass(unsigned int SmVersion);
	FunctionPass *createNVVMReflectPass();			FunctionPass *createNVVMReflectPass();
	FunctionPass *createNVVMReflectPass(const StringMap<int> &Mapping);			FunctionPass *createNVVMReflectPass(const StringMap<int> &Mapping);
	MachineFunctionPass *createNVPTXPrologEpilogPass();			MachineFunctionPass *createNVPTXPrologEpilogPass();
	MachineFunctionPass *createNVPTXReplaceImageHandlesPass();			MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
	FunctionPass *createNVPTXImageOptimizerPass();			FunctionPass *createNVPTXImageOptimizerPass();
	FunctionPass createNVPTXLowerKernelArgsPass(const NVPTXTargetMachine TM);			FunctionPass createNVPTXLowerKernelArgsPass(const NVPTXTargetMachine TM);
	BasicBlockPass *createNVPTXLowerAllocaPass();			BasicBlockPass *createNVPTXLowerAllocaPass();
	MachineFunctionPass *createNVPTXPeephole();			MachineFunctionPass *createNVPTXPeephole();
	▲ Show 20 Lines • Show All 121 Lines • Show Last 20 Lines

lib/Target/NVPTX/NVPTXTargetMachine.cpp

	Show First 20 Lines • Show All 49 Lines • ▼ Show 20 Lines
	using namespace llvm;			using namespace llvm;

	static cl::opt<bool> UseInferAddressSpaces(			static cl::opt<bool> UseInferAddressSpaces(
	"nvptx-use-infer-addrspace", cl::init(false), cl::Hidden,			"nvptx-use-infer-addrspace", cl::init(false), cl::Hidden,
	cl::desc("Optimize address spaces using NVPTXInferAddressSpaces instead of "			cl::desc("Optimize address spaces using NVPTXInferAddressSpaces instead of "
	"NVPTXFavorNonGenericAddrSpaces"));			"NVPTXFavorNonGenericAddrSpaces"));

	namespace llvm {			namespace llvm {
				void initializeNVVMIntrRangePass(PassRegistry&);
	void initializeNVVMReflectPass(PassRegistry&);			void initializeNVVMReflectPass(PassRegistry&);
	void initializeGenericToNVVMPass(PassRegistry&);			void initializeGenericToNVVMPass(PassRegistry&);
	void initializeNVPTXAllocaHoistingPass(PassRegistry &);			void initializeNVPTXAllocaHoistingPass(PassRegistry &);
	void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);			void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
	void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);			void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
	void initializeNVPTXInferAddressSpacesPass(PassRegistry &);			void initializeNVPTXInferAddressSpacesPass(PassRegistry &);
	void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);			void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
	void initializeNVPTXLowerKernelArgsPass(PassRegistry &);			void initializeNVPTXLowerKernelArgsPass(PassRegistry &);
	void initializeNVPTXLowerAllocaPass(PassRegistry &);			void initializeNVPTXLowerAllocaPass(PassRegistry &);
	}			}

	extern "C" void LLVMInitializeNVPTXTarget() {			extern "C" void LLVMInitializeNVPTXTarget() {
	// Register the target.			// Register the target.
	RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32);			RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32);
	RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64);			RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64);

	// FIXME: This pass is really intended to be invoked during IR optimization,			// FIXME: This pass is really intended to be invoked during IR optimization,
	// but it's very NVPTX-specific.			// but it's very NVPTX-specific.
	PassRegistry &PR = *PassRegistry::getPassRegistry();			PassRegistry &PR = *PassRegistry::getPassRegistry();
	initializeNVVMReflectPass(PR);			initializeNVVMReflectPass(PR);
				initializeNVVMIntrRangePass(PR);
	initializeGenericToNVVMPass(PR);			initializeGenericToNVVMPass(PR);
	initializeNVPTXAllocaHoistingPass(PR);			initializeNVPTXAllocaHoistingPass(PR);
	initializeNVPTXAssignValidGlobalNamesPass(PR);			initializeNVPTXAssignValidGlobalNamesPass(PR);
	initializeNVPTXFavorNonGenericAddrSpacesPass(PR);			initializeNVPTXFavorNonGenericAddrSpacesPass(PR);
	initializeNVPTXInferAddressSpacesPass(PR);			initializeNVPTXInferAddressSpacesPass(PR);
	initializeNVPTXLowerKernelArgsPass(PR);			initializeNVPTXLowerKernelArgsPass(PR);
	initializeNVPTXLowerAllocaPass(PR);			initializeNVPTXLowerAllocaPass(PR);
	initializeNVPTXLowerAggrCopiesPass(PR);			initializeNVPTXLowerAggrCopiesPass(PR);
	▲ Show 20 Lines • Show All 85 Lines • ▼ Show 20 Lines
	} // end anonymous namespace			} // end anonymous namespace

	TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {			TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
	return new NVPTXPassConfig(this, PM);			return new NVPTXPassConfig(this, PM);
	}			}

	void NVPTXTargetMachine::addEarlyAsPossiblePasses(PassManagerBase &PM) {			void NVPTXTargetMachine::addEarlyAsPossiblePasses(PassManagerBase &PM) {
	PM.add(createNVVMReflectPass());			PM.add(createNVVMReflectPass());
				PM.add(createNVVMIntrRangePass(Subtarget.getSmVersion()));
				jlebarUnsubmitted Not Done Reply Inline Actions I have no idea if this is the right way to pass this information to the pass. Seems reasonable to me, though. jlebar: I have no idea if this is the right way to pass this information to the pass. Seems reasonable…
	}			}

	TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() {			TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() {
	return TargetIRAnalysis([this](const Function &F) {			return TargetIRAnalysis([this](const Function &F) {
	return TargetTransformInfo(NVPTXTTIImpl(this, F));			return TargetTransformInfo(NVPTXTTIImpl(this, F));
	});			});
	}			}

	▲ Show 20 Lines • Show All 188 Lines • Show Last 20 Lines

lib/Target/NVPTX/NVVMIntrRange.cpp

This file was added.

				//===- NVVMIntrRange.cpp - Set !range metadata for NVVM intrinsics --------===//
				//
				// The LLVM Compiler Infrastructure
				//
				// This file is distributed under the University of Illinois Open Source
				// License. See LICENSE.TXT for details.
				//
				//===----------------------------------------------------------------------===//
				//
				// This pass adds appropriate !range metadata for calls to NVVM
				// intrinsics that return a limited range of values.
				jlebarUnsubmitted Done Reply Inline Actions calls to jlebar: calls to
				//
				jlebarUnsubmitted Done Reply Inline Actions a limited range jlebar: a limited range
				//===----------------------------------------------------------------------===//

				#include "NVPTX.h"
				#include "llvm/IR/Constants.h"
				#include "llvm/IR/InstIterator.h"
				#include "llvm/IR/Intrinsics.h"
				#include "llvm/IR/Instructions.h"

				using namespace llvm;

				#define DEBUG_TYPE "nvvm-intr-range"

				namespace llvm { void initializeNVVMIntrRangePass(PassRegistry &); }

				// Add !range metadata based on limits of given SM variant.
				static cl::opt<unsigned> NVVMIntrRangeSM("nvvm-intr-range-sm", cl::init(20),
				cl::Hidden, cl::desc("SM variant"));

				namespace {
				class NVVMIntrRange : public FunctionPass {
				private:
				struct {
				unsigned x, y, z;
				} MaxBlockSize, MaxGridSize;

				jlebarUnsubmitted Done Reply Inline Actions I think at least a comment, and possibly more descriptive names (blockMaxes / gridMaxes?) would be helpful to readers. Actually, since you don't use the fact that these are structs at all, maybe just six member variables would be clearer. maxBlockX, maxThreadZ, etc. What do you think? jlebar: I think at least a comment, and possibly more descriptive names (blockMaxes / gridMaxes?) would…
				jingyueUnsubmitted Not Done Reply Inline Actions Maybe name them BlockDim and GridDim jingyue: Maybe name them BlockDim and GridDim
				public:
				traAuthorUnsubmitted Not Done Reply Inline Actions I've made names more descriptive. IMO struct fits quite well for describing dimensions/indexes of 3d grid. tra: I've made names more descriptive. IMO struct fits quite well for describing dimensions/indexes…
				static char ID;
				NVVMIntrRange() : NVVMIntrRange(NVVMIntrRangeSM) {}
				NVVMIntrRange(unsigned int SmVersion)
				: FunctionPass(ID), MaxBlockSize{1024, 1024, 64},
				MaxGridSize{SmVersion >= 30 ? 0x7fffffffu : 0xffffu, 0xffff, 0xffff} {
				initializeNVVMIntrRangePass(*PassRegistry::getPassRegistry());
				}

				bool runOnFunction(Function &) override;
				};
				}

				FunctionPass *llvm::createNVVMIntrRangePass(unsigned int SmVersion) {
				return new NVVMIntrRange(SmVersion);
				}

				char NVVMIntrRange::ID = 0;
				INITIALIZE_PASS(NVVMIntrRange, "nvvm-intr-range",
				"Add !range metadata to NVVM intrinsics.", false, false)

				// Adds the passed-in [Low,High) range information as metadata to the
				// passed-in call instruction.
				static bool addRangeMetadata(int Low, int High, CallInst *C) {
				jlebarUnsubmitted Done Reply Inline Actions Maybe indicate that the range is [Low, High)? jlebar: Maybe indicate that the range is [Low, High)?
				jlebarUnsubmitted Done Reply Inline Actions MaxGridSize.x is INT_MAX for sm_30+. Then we add 1 to it, and store in a signed int... jlebar: MaxGridSize.x is INT_MAX for sm_30+. Then we add 1 to it, and store in a signed int...
				traAuthorUnsubmitted Not Done Reply Inline Actions Changed arg type to uint64_t. As for the range itself, it apparently allows wrapping, so exclusive high boundary wrapped to negative value should be OK. In any case we don't have any other way to encode it considering that range values must be the same as return type which is i32 in this case. tra: Changed arg type to uint64_t. As for the range itself, it apparently allows wrapping, so…
				LLVMContext &Context = C->getParent()->getContext();
				jingyueUnsubmitted Done Reply Inline Actions Can you comment on whether the range is [Low, High) or [Low, High]? jingyue: Can you comment on whether the range is [Low, High) or [Low, High]?
				IntegerType *Int32Ty = Type::getInt32Ty(Context);
				Metadata *LowAndHigh[] = {
				ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Low)),
				ConstantAsMetadata::get(ConstantInt::get(Int32Ty, High))};
				C->setMetadata(LLVMContext::MD_range, MDNode::get(Context, LowAndHigh));
				jlebarUnsubmitted Done Reply Inline Actions We're already using namespace llvm, so don't need "llvm::" here. jlebar: We're already using namespace llvm, so don't need "llvm::" here.
				return true;
				}

				bool NVVMIntrRange::runOnFunction(Function &F) {
				// Go through the calls in this function.
				bool Changed = false;
				for (Instruction &I : instructions(F)) {
				CallInst *Call = dyn_cast<CallInst>(&I);
				if (!Call)
				continue;

				if (Function *Callee = Call->getCalledFunction()) {
				switch (Callee->getIntrinsicID()) {
				// Index within block
				case Intrinsic::ptx_read_tid_x:
				case Intrinsic::nvvm_read_ptx_sreg_tid_x:
				Changed \|= addRangeMetadata(0, MaxBlockSize.x, Call);
				break;
				case Intrinsic::ptx_read_tid_y:
				case Intrinsic::nvvm_read_ptx_sreg_tid_y:
				Changed \|= addRangeMetadata(0, MaxBlockSize.y, Call);
				break;
				case Intrinsic::ptx_read_tid_z:
				case Intrinsic::nvvm_read_ptx_sreg_tid_z:
				Changed \|= addRangeMetadata(0, MaxBlockSize.z, Call);
				break;

				// Block size
				case Intrinsic::ptx_read_ntid_x:
				case Intrinsic::nvvm_read_ptx_sreg_ntid_x:
				Changed \|= addRangeMetadata(1, MaxBlockSize.x+1, Call);
				break;
				case Intrinsic::ptx_read_ntid_y:
				case Intrinsic::nvvm_read_ptx_sreg_ntid_y:
				Changed \|= addRangeMetadata(1, MaxBlockSize.y+1, Call);
				break;
				case Intrinsic::ptx_read_ntid_z:
				case Intrinsic::nvvm_read_ptx_sreg_ntid_z:
				Changed \|= addRangeMetadata(1, MaxBlockSize.z+1, Call);
				break;

				// Index within grid
				case Intrinsic::ptx_read_ctaid_x:
				case Intrinsic::nvvm_read_ptx_sreg_ctaid_x:
				Changed \|= addRangeMetadata(0, MaxGridSize.x, Call);
				break;
				case Intrinsic::ptx_read_ctaid_y:
				case Intrinsic::nvvm_read_ptx_sreg_ctaid_y:
				Changed \|= addRangeMetadata(0, MaxGridSize.y, Call);
				break;
				case Intrinsic::ptx_read_ctaid_z:
				case Intrinsic::nvvm_read_ptx_sreg_ctaid_z:
				Changed \|= addRangeMetadata(0, MaxGridSize.z, Call);
				break;

				// Grid size
				case Intrinsic::ptx_read_nctaid_x:
				case Intrinsic::nvvm_read_ptx_sreg_nctaid_x:
				Changed \|= addRangeMetadata(1, MaxGridSize.x+1, Call);
				break;
				case Intrinsic::ptx_read_nctaid_y:
				case Intrinsic::nvvm_read_ptx_sreg_nctaid_y:
				Changed \|= addRangeMetadata(1, MaxGridSize.y+1, Call);
				break;
				case Intrinsic::ptx_read_nctaid_z:
				case Intrinsic::nvvm_read_ptx_sreg_nctaid_z:
				Changed \|= addRangeMetadata(1, MaxGridSize.z+1, Call);
				break;

				// warp size is constant 32.
				case Intrinsic::nvvm_read_ptx_sreg_warpsize:
				Changed \|= addRangeMetadata(32, 32+1, Call);
				break;

				// Lane ID is [0..warpsize)
				case Intrinsic::ptx_read_laneid:
				Changed \|= addRangeMetadata(0, 32, Call);
				break;

				default:
				break;
				}
				}
				}

				return Changed;
				}

test/CodeGen/NVPTX/intrinsic-old.ll

	; RUN: llc < %s -march=nvptx -mcpu=sm_20 \| FileCheck %s			; RUN: llc < %s -march=nvptx -mcpu=sm_20 \| FileCheck %s
	; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 \| FileCheck %s			; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 \| FileCheck %s
				; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -nvvm-intr-range \
				; RUN: \| FileCheck --check-prefix=RANGE --check-prefix=RANGE_20 %s
				; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda \
				; RUN: -nvvm-intr-range -nvvm-intr-range-sm=30 \
				; RUN: \| FileCheck --check-prefix=RANGE --check-prefix=RANGE_30 %s

	define ptx_device i32 @test_tid_x() {			define ptx_device i32 @test_tid_x() {
	; CHECK: mov.u32 %r{{[0-9]+}}, %tid.x;			; CHECK: mov.u32 %r{{[0-9]+}}, %tid.x;
				; RANGE: call i32 @llvm.ptx.read.tid.x(), !range ![[BLK_IDX_XY:[0-9]+]]
				jlebarUnsubmitted Done Reply Inline Actions Probably shouldn't do it in this patch, but FYI if you add CHECK-LABEL: @foo in front of each function, that will make it a lot easier to debug if it fails. CHECK-LABEL essentially partitions the checks. Although I think it applies only to CHECKs, not e.g. RANGEs. jlebar: Probably shouldn't do it in this patch, but FYI if you add CHECK-LABEL: @foo in front of each…
	; CHECK: ret;			; CHECK: ret;
				traAuthorUnsubmitted Not Done Reply Inline Actions Having -LABEL does not matter much in this case as each function checks for its own unique register and that applies to both CHECK and RANGE. tra: Having -LABEL does not matter much in this case as each function checks for its own unique…
	%x = call i32 @llvm.ptx.read.tid.x()			%x = call i32 @llvm.ptx.read.tid.x()
	ret i32 %x			ret i32 %x
	}			}

	define ptx_device i32 @test_tid_y() {			define ptx_device i32 @test_tid_y() {
	; CHECK: mov.u32 %r{{[0-9]+}}, %tid.y;			; CHECK: mov.u32 %r{{[0-9]+}}, %tid.y;
				; RANGE: call i32 @llvm.ptx.read.tid.y(), !range ![[BLK_IDX_XY]]
	; CHECK: ret;			; CHECK: ret;
	%x = call i32 @llvm.ptx.read.tid.y()			%x = call i32 @llvm.ptx.read.tid.y()
	ret i32 %x			ret i32 %x
	}			}

	define ptx_device i32 @test_tid_z() {			define ptx_device i32 @test_tid_z() {
	; CHECK: mov.u32 %r{{[0-9]+}}, %tid.z;			; CHECK: mov.u32 %r{{[0-9]+}}, %tid.z;
				; RANGE: call i32 @llvm.ptx.read.tid.z(), !range ![[BLK_IDX_Z:[0-9]+]]
	; CHECK: ret;			; CHECK: ret;
	%x = call i32 @llvm.ptx.read.tid.z()			%x = call i32 @llvm.ptx.read.tid.z()
	ret i32 %x			ret i32 %x
	}			}

	define ptx_device i32 @test_tid_w() {			define ptx_device i32 @test_tid_w() {
	; CHECK: mov.u32 %r{{[0-9]+}}, %tid.w;			; CHECK: mov.u32 %r{{[0-9]+}}, %tid.w;
	; CHECK: ret;			; CHECK: ret;
	%x = call i32 @llvm.ptx.read.tid.w()			%x = call i32 @llvm.ptx.read.tid.w()
	ret i32 %x			ret i32 %x
	}			}

	define ptx_device i32 @test_ntid_x() {			define ptx_device i32 @test_ntid_x() {
	; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.x;			; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.x;
				; RANGE: call i32 @llvm.ptx.read.ntid.x(), !range ![[BLK_SIZE_XY:[0-9]+]]
	; CHECK: ret;			; CHECK: ret;
	%x = call i32 @llvm.ptx.read.ntid.x()			%x = call i32 @llvm.ptx.read.ntid.x()
	ret i32 %x			ret i32 %x
	}			}

	define ptx_device i32 @test_ntid_y() {			define ptx_device i32 @test_ntid_y() {
	; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.y;			; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.y;
				; RANGE: call i32 @llvm.ptx.read.ntid.y(), !range ![[BLK_SIZE_XY]]
	; CHECK: ret;			; CHECK: ret;
	%x = call i32 @llvm.ptx.read.ntid.y()			%x = call i32 @llvm.ptx.read.ntid.y()
	ret i32 %x			ret i32 %x
	}			}

	define ptx_device i32 @test_ntid_z() {			define ptx_device i32 @test_ntid_z() {
	; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.z;			; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.z;
				; RANGE: call i32 @llvm.ptx.read.ntid.z(), !range ![[BLK_SIZE_Z:[0-9]+]]
	; CHECK: ret;			; CHECK: ret;
	%x = call i32 @llvm.ptx.read.ntid.z()			%x = call i32 @llvm.ptx.read.ntid.z()
	ret i32 %x			ret i32 %x
	}			}

	define ptx_device i32 @test_ntid_w() {			define ptx_device i32 @test_ntid_w() {
	; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.w;			; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.w;
	; CHECK: ret;			; CHECK: ret;
	%x = call i32 @llvm.ptx.read.ntid.w()			%x = call i32 @llvm.ptx.read.ntid.w()
	ret i32 %x			ret i32 %x
	}			}

	define ptx_device i32 @test_laneid() {			define ptx_device i32 @test_laneid() {
	; CHECK: mov.u32 %r{{[0-9]+}}, %laneid;			; CHECK: mov.u32 %r{{[0-9]+}}, %laneid;
				; RANGE: call i32 @llvm.ptx.read.laneid(), !range ![[LANEID:[0-9]+]]
	; CHECK: ret;			; CHECK: ret;
	%x = call i32 @llvm.ptx.read.laneid()			%x = call i32 @llvm.ptx.read.laneid()
	ret i32 %x			ret i32 %x
	}			}

				define ptx_device i32 @test_warpsize() {
				; CHECK: mov.u32 %r{{[0-9]+}}, WARP_SZ;
				; RANGE: call i32 @llvm.nvvm.read.ptx.sreg.warpsize(), !range ![[WARPSIZE:[0-9]+]]
				; CHECK: ret;
				%x = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
				ret i32 %x
				}

	define ptx_device i32 @test_warpid() {			define ptx_device i32 @test_warpid() {
	; CHECK: mov.u32 %r{{[0-9]+}}, %warpid;			; CHECK: mov.u32 %r{{[0-9]+}}, %warpid;
	; CHECK: ret;			; CHECK: ret;
	%x = call i32 @llvm.ptx.read.warpid()			%x = call i32 @llvm.ptx.read.warpid()
	ret i32 %x			ret i32 %x
	}			}

	define ptx_device i32 @test_nwarpid() {			define ptx_device i32 @test_nwarpid() {
	; CHECK: mov.u32 %r{{[0-9]+}}, %nwarpid;			; CHECK: mov.u32 %r{{[0-9]+}}, %nwarpid;
	; CHECK: ret;			; CHECK: ret;
	%x = call i32 @llvm.ptx.read.nwarpid()			%x = call i32 @llvm.ptx.read.nwarpid()
	ret i32 %x			ret i32 %x
	}			}

	define ptx_device i32 @test_ctaid_x() {
	; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.x;
	; CHECK: ret;
	%x = call i32 @llvm.ptx.read.ctaid.x()
	ret i32 %x
	}

	define ptx_device i32 @test_ctaid_y() {			define ptx_device i32 @test_ctaid_y() {
	; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.y;			; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.y;
				; RANGE: call i32 @llvm.ptx.read.ctaid.y(), !range ![[GRID_IDX_YZ:[0-9]+]]
	; CHECK: ret;			; CHECK: ret;
	%x = call i32 @llvm.ptx.read.ctaid.y()			%x = call i32 @llvm.ptx.read.ctaid.y()
	ret i32 %x			ret i32 %x
	}			}

	define ptx_device i32 @test_ctaid_z() {			define ptx_device i32 @test_ctaid_z() {
	; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.z;			; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.z;
				; RANGE: call i32 @llvm.ptx.read.ctaid.z(), !range ![[GRID_IDX_YZ]]
	; CHECK: ret;			; CHECK: ret;
	%x = call i32 @llvm.ptx.read.ctaid.z()			%x = call i32 @llvm.ptx.read.ctaid.z()
	ret i32 %x			ret i32 %x
	}			}

	define ptx_device i32 @test_ctaid_w() {			define ptx_device i32 @test_ctaid_x() {
	; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.w;			; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.x;
				; RANGE_30: call i32 @llvm.ptx.read.ctaid.x(), !range ![[GRID_IDX_X:[0-9]+]]
				; RANGE_20: call i32 @llvm.ptx.read.ctaid.x(), !range ![[GRID_IDX_YZ]]
	; CHECK: ret;			; CHECK: ret;
	%x = call i32 @llvm.ptx.read.ctaid.w()			%x = call i32 @llvm.ptx.read.ctaid.x()
	ret i32 %x			ret i32 %x
	}			}

	define ptx_device i32 @test_nctaid_x() {			define ptx_device i32 @test_ctaid_w() {
	; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.x;			; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.w;
	; CHECK: ret;			; CHECK: ret;
	%x = call i32 @llvm.ptx.read.nctaid.x()			%x = call i32 @llvm.ptx.read.ctaid.w()
	ret i32 %x			ret i32 %x
	}			}

	define ptx_device i32 @test_nctaid_y() {			define ptx_device i32 @test_nctaid_y() {
	; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.y;			; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.y;
				; RANGE: call i32 @llvm.ptx.read.nctaid.y(), !range ![[GRID_SIZE_YZ:[0-9]+]]
	; CHECK: ret;			; CHECK: ret;
	%x = call i32 @llvm.ptx.read.nctaid.y()			%x = call i32 @llvm.ptx.read.nctaid.y()
	ret i32 %x			ret i32 %x
	}			}

	define ptx_device i32 @test_nctaid_z() {			define ptx_device i32 @test_nctaid_z() {
	; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.z;			; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.z;
				; RANGE: call i32 @llvm.ptx.read.nctaid.z(), !range ![[GRID_SIZE_YZ]]
	; CHECK: ret;			; CHECK: ret;
	%x = call i32 @llvm.ptx.read.nctaid.z()			%x = call i32 @llvm.ptx.read.nctaid.z()
	ret i32 %x			ret i32 %x
	}			}

				define ptx_device i32 @test_nctaid_x() {
				; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.x;
				; RANGE_30: call i32 @llvm.ptx.read.nctaid.x(), !range ![[GRID_SIZE_X:[0-9]+]]
				; RANGE_20: call i32 @llvm.ptx.read.nctaid.x(), !range ![[GRID_SIZE_YZ]]
				; CHECK: ret;
				%x = call i32 @llvm.ptx.read.nctaid.x()
				ret i32 %x
				}


	define ptx_device i32 @test_nctaid_w() {			define ptx_device i32 @test_nctaid_w() {
	; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.w;			; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.w;
	; CHECK: ret;			; CHECK: ret;
	%x = call i32 @llvm.ptx.read.nctaid.w()			%x = call i32 @llvm.ptx.read.nctaid.w()
	ret i32 %x			ret i32 %x
	}			}

	define ptx_device i32 @test_smid() {			define ptx_device i32 @test_smid() {
	▲ Show 20 Lines • Show All 105 Lines • ▼ Show 20 Lines
	declare i32 @llvm.ptx.read.tid.y()			declare i32 @llvm.ptx.read.tid.y()
	declare i32 @llvm.ptx.read.tid.z()			declare i32 @llvm.ptx.read.tid.z()
	declare i32 @llvm.ptx.read.tid.w()			declare i32 @llvm.ptx.read.tid.w()
	declare i32 @llvm.ptx.read.ntid.x()			declare i32 @llvm.ptx.read.ntid.x()
	declare i32 @llvm.ptx.read.ntid.y()			declare i32 @llvm.ptx.read.ntid.y()
	declare i32 @llvm.ptx.read.ntid.z()			declare i32 @llvm.ptx.read.ntid.z()
	declare i32 @llvm.ptx.read.ntid.w()			declare i32 @llvm.ptx.read.ntid.w()

				declare i32 @llvm.nvvm.read.ptx.sreg.warpsize()
	declare i32 @llvm.ptx.read.laneid()			declare i32 @llvm.ptx.read.laneid()
	declare i32 @llvm.ptx.read.warpid()			declare i32 @llvm.ptx.read.warpid()
	declare i32 @llvm.ptx.read.nwarpid()			declare i32 @llvm.ptx.read.nwarpid()

	declare i32 @llvm.ptx.read.ctaid.x()			declare i32 @llvm.ptx.read.ctaid.x()
	declare i32 @llvm.ptx.read.ctaid.y()			declare i32 @llvm.ptx.read.ctaid.y()
	declare i32 @llvm.ptx.read.ctaid.z()			declare i32 @llvm.ptx.read.ctaid.z()
	declare i32 @llvm.ptx.read.ctaid.w()			declare i32 @llvm.ptx.read.ctaid.w()
	Show All 16 Lines
	declare i64 @llvm.ptx.read.clock64()			declare i64 @llvm.ptx.read.clock64()

	declare i32 @llvm.ptx.read.pm0()			declare i32 @llvm.ptx.read.pm0()
	declare i32 @llvm.ptx.read.pm1()			declare i32 @llvm.ptx.read.pm1()
	declare i32 @llvm.ptx.read.pm2()			declare i32 @llvm.ptx.read.pm2()
	declare i32 @llvm.ptx.read.pm3()			declare i32 @llvm.ptx.read.pm3()

	declare void @llvm.ptx.bar.sync(i32 %i)			declare void @llvm.ptx.bar.sync(i32 %i)

				; RANGE-DAG: ![[BLK_IDX_XY]] = !{i32 0, i32 1024}
				; RANGE-DAG: ![[BLK_IDX_Z]] = !{i32 0, i32 64}
				; RANGE-DAG: ![[BLK_SIZE_XY]] = !{i32 1, i32 1025}
				; RANGE-DAG: ![[BLK_SIZE_Z]] = !{i32 1, i32 65}
				; RANGE-DAG: ![[LANEID]] = !{i32 0, i32 32}
				; RANGE-DAG: ![[WARPSIZE]] = !{i32 32, i32 33}
				; RANGE_30-DAG: ![[GRID_IDX_X]] = !{i32 0, i32 2147483647}
				; RANGE-DAG: ![[GRID_IDX_YZ]] = !{i32 0, i32 65535}
				; RANGE_30-DAG: ![[GRID_SIZE_X]] = !{i32 1, i32 -2147483648}
				; RANGE-DAG: ![[GRID_SIZE_YZ]] = !{i32 1, i32 65536}
				jlebarUnsubmitted Not Done Reply Inline Actions It looks like we're covering everything other than warpSize -- can we add that one too, for completeness? jlebar: It looks like we're covering everything other than warpSize -- can we add that one too, for…
				traAuthorUnsubmitted Not Done Reply Inline Actions ptx.read.* and nvvm.read.ptx.sreg.* appear to have diverged. There's no llvm.ptx.read.warpsize, so it's not in this file. We apparently don't test nvvm.read.ptx.sreg.* intrinsics much, either. I can add a range test for nvvm.read.ptx.sreg.warpsize to this patch, but it looks like there's enough work for a separate patch. tra: ptx.read.* and nvvm.read.ptx.sreg.* appear to have diverged. There's no llvm.ptx.read.warpsize…

				jingyueUnsubmitted Not Done Reply Inline Actions Trailing blank line jingyue: Trailing blank line

This is an archive of the discontinued LLVM Phabricator instance.

[NVPTX] Added NVVMIntrRange pass
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 58522

lib/Target/NVPTX/CMakeLists.txt

lib/Target/NVPTX/NVPTX.h

lib/Target/NVPTX/NVPTXTargetMachine.cpp

lib/Target/NVPTX/NVVMIntrRange.cpp

test/CodeGen/NVPTX/intrinsic-old.ll

This is an archive of the discontinued LLVM Phabricator instance.

[NVPTX] Added NVVMIntrRange pass ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 58522

lib/Target/NVPTX/CMakeLists.txt

lib/Target/NVPTX/NVPTX.h

lib/Target/NVPTX/NVPTXTargetMachine.cpp

lib/Target/NVPTX/NVVMIntrRange.cpp

test/CodeGen/NVPTX/intrinsic-old.ll

[NVPTX] Added NVVMIntrRange pass
ClosedPublic