Diff 109790

llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td

	Show First 20 Lines • Show All 750 Lines • ▼ Show 20 Lines
	// with the guarantee that the source value is computed as if the entire			// with the guarantee that the source value is computed as if the entire
	// program were executed in Whole Wavefront Mode, i.e. with all channels			// program were executed in Whole Wavefront Mode, i.e. with all channels
	// enabled, with a few exceptions: - Phi nodes with require WWM return an			// enabled, with a few exceptions: - Phi nodes with require WWM return an
	// undefined value.			// undefined value.
	def int_amdgcn_wwm : Intrinsic<[llvm_any_ty],			def int_amdgcn_wwm : Intrinsic<[llvm_any_ty],
	[LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]			[LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
	>;			>;

				// Given a value, copies it while setting all the inactive lanes to a given
				// value. Note that OpenGL helper lanes are considered active, so if the
				// program ever uses WQM, then the instruction and the first source will be
				// computed in WQM.
				def int_amdgcn_set_inactive :
				Intrinsic<[llvm_anyint_ty],
				[LLVMMatchType<0>, // value to be copied
				LLVMMatchType<0>], // value for the inactive lanes to take
				[IntrNoMem, IntrConvergent]>;

	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	// CI+ Intrinsics			// CI+ Intrinsics
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	def int_amdgcn_s_dcache_inv_vol :			def int_amdgcn_s_dcache_inv_vol :
	GCCBuiltin<"__builtin_amdgcn_s_dcache_inv_vol">,			GCCBuiltin<"__builtin_amdgcn_s_dcache_inv_vol">,
	Intrinsic<[], [], []>;			Intrinsic<[], [], []>;

	▲ Show 20 Lines • Show All 77 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp

Show First 20 Lines • Show All 1,093 Lines • ▼ Show 20 Lines	if (SrcOp.isImm()) {
.addReg(Dst, RegState::Implicit \| RegState::Define);		.addReg(Dst, RegState::Implicit \| RegState::Define);
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)		BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
.addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1))		.addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1))
.addReg(Dst, RegState::Implicit \| RegState::Define);		.addReg(Dst, RegState::Implicit \| RegState::Define);
}		}
MI.eraseFromParent();		MI.eraseFromParent();
break;		break;
}		}
		case AMDGPU::V_SET_INACTIVE_B32: {
		BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
		.addReg(AMDGPU::EXEC);
		BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), MI.getOperand(0).getReg())
		.add(MI.getOperand(2));
		BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
		.addReg(AMDGPU::EXEC);
		MI.eraseFromParent();
		break;
		}
		case AMDGPU::V_SET_INACTIVE_B64: {
		BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
		.addReg(AMDGPU::EXEC);
		MachineInstr *Copy = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO),
		MI.getOperand(0).getReg())
		.add(MI.getOperand(2));
		expandPostRAPseudo(*Copy);
		BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
		.addReg(AMDGPU::EXEC);
		MI.eraseFromParent();
		break;
		}
case AMDGPU::V_MOVRELD_B32_V1:		case AMDGPU::V_MOVRELD_B32_V1:
case AMDGPU::V_MOVRELD_B32_V2:		case AMDGPU::V_MOVRELD_B32_V2:
case AMDGPU::V_MOVRELD_B32_V4:		case AMDGPU::V_MOVRELD_B32_V4:
case AMDGPU::V_MOVRELD_B32_V8:		case AMDGPU::V_MOVRELD_B32_V8:
case AMDGPU::V_MOVRELD_B32_V16: {		case AMDGPU::V_MOVRELD_B32_V16: {
const MCInstrDesc &MovRelDesc = get(AMDGPU::V_MOVRELD_B32_e32);		const MCInstrDesc &MovRelDesc = get(AMDGPU::V_MOVRELD_B32_e32);
unsigned VecReg = MI.getOperand(0).getReg();		unsigned VecReg = MI.getOperand(0).getReg();
bool IsUndef = MI.getOperand(1).isUndef();		bool IsUndef = MI.getOperand(1).isUndef();
▲ Show 20 Lines • Show All 3,271 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/AMDGPU/SIInstructions.td

	Show First 20 Lines • Show All 131 Lines • ▼ Show 20 Lines
	} // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC]			} // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC]

	def EXIT_WWM : SPseudoInstSI <(outs SReg_64:$sdst), (ins SReg_64:$src0)> {			def EXIT_WWM : SPseudoInstSI <(outs SReg_64:$sdst), (ins SReg_64:$src0)> {
	let hasSideEffects = 0;			let hasSideEffects = 0;
	let mayLoad = 0;			let mayLoad = 0;
	let mayStore = 0;			let mayStore = 0;
	}			}

				// Invert the exec mask and overwrite the inactive lanes of dst with inactive,
				// restoring it after we're done.
				def V_SET_INACTIVE_B32 : VPseudoInstSI <(outs VGPR_32:$vdst),
				(ins VGPR_32: $src, VSrc_b32:$inactive),
				[(set i32:$vdst, (int_amdgcn_set_inactive i32:$src, i32:$inactive))]> {
				let Constraints = "$src = $vdst";
				}

				def V_SET_INACTIVE_B64 : VPseudoInstSI <(outs VReg_64:$vdst),
				(ins VReg_64: $src, VSrc_b64:$inactive),
				[(set i64:$vdst, (int_amdgcn_set_inactive i64:$src, i64:$inactive))]> {
				let Constraints = "$src = $vdst";
				}

	let usesCustomInserter = 1, SALU = 1 in {			let usesCustomInserter = 1, SALU = 1 in {
	def GET_GROUPSTATICSIZE : PseudoInstSI <(outs SReg_32:$sdst), (ins),			def GET_GROUPSTATICSIZE : PseudoInstSI <(outs SReg_32:$sdst), (ins),
	[(set SReg_32:$sdst, (int_amdgcn_groupstaticsize))]>;			[(set SReg_32:$sdst, (int_amdgcn_groupstaticsize))]>;
	} // End let usesCustomInserter = 1, SALU = 1			} // End let usesCustomInserter = 1, SALU = 1

	def S_MOV_B64_term : PseudoInstSI<(outs SReg_64:$dst),			def S_MOV_B64_term : PseudoInstSI<(outs SReg_64:$dst),
	(ins SSrc_b64:$src0)> {			(ins SSrc_b64:$src0)> {
	let SALU = 1;			let SALU = 1;
	▲ Show 20 Lines • Show All 1,286 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp

Show First 20 Lines • Show All 297 Lines • ▼ Show 20 Lines
}		}

// Scan instructions to determine which ones require an Exact execmask and		// Scan instructions to determine which ones require an Exact execmask and
// which ones seed WQM requirements.		// which ones seed WQM requirements.
char SIWholeQuadMode::scanInstructions(MachineFunction &MF,		char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
std::vector<WorkItem> &Worklist) {		std::vector<WorkItem> &Worklist) {
char GlobalFlags = 0;		char GlobalFlags = 0;
bool WQMOutputs = MF.getFunction()->hasFnAttribute("amdgpu-ps-wqm-outputs");		bool WQMOutputs = MF.getFunction()->hasFnAttribute("amdgpu-ps-wqm-outputs");
		SmallVector<MachineInstr *, 4> SetInactiveInstrs;

// We need to visit the basic blocks in reverse post-order so that we visit		// We need to visit the basic blocks in reverse post-order so that we visit
// defs before uses, in particular so that we don't accidentally mark an		// defs before uses, in particular so that we don't accidentally mark an
// instruction as needing e.g. WQM before visiting it and realizing it needs		// instruction as needing e.g. WQM before visiting it and realizing it needs
// WQM disabled.		// WQM disabled.
ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);		ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
for (auto BI = RPOT.begin(), BE = RPOT.end(); BI != BE; ++BI) {		for (auto BI = RPOT.begin(), BE = RPOT.end(); BI != BE; ++BI) {
MachineBasicBlock &MBB = **BI;		MachineBasicBlock &MBB = **BI;
Show All 22 Lines	for (auto II = MBB.begin(), IE = MBB.end(); II != IE; ++II) {
} else if (Opcode == AMDGPU::WWM) {		} else if (Opcode == AMDGPU::WWM) {
// The WWM intrinsic doesn't make the same guarantee, and plus it needs		// The WWM intrinsic doesn't make the same guarantee, and plus it needs
// to be executed in WQM or Exact so that its copy doesn't clobber		// to be executed in WQM or Exact so that its copy doesn't clobber
// inactive lanes.		// inactive lanes.
markInstructionUses(MI, StateWWM, Worklist);		markInstructionUses(MI, StateWWM, Worklist);
GlobalFlags \|= StateWWM;		GlobalFlags \|= StateWWM;
LowerToCopyInstrs.push_back(&MI);		LowerToCopyInstrs.push_back(&MI);
continue;		continue;
		} else if (Opcode == AMDGPU::V_SET_INACTIVE_B32 \|\|
		Opcode == AMDGPU::V_SET_INACTIVE_B64) {
		III.Disabled = StateWWM;
		MachineOperand &Inactive = MI.getOperand(2);
		if (Inactive.isReg()) {
		if (Inactive.isUndef()) {
		LowerToCopyInstrs.push_back(&MI);
		} else {
		unsigned Reg = Inactive.getReg();
		if (TargetRegisterInfo::isVirtualRegister(Reg)) {
		for (MachineInstr &DefMI : MRI->def_instructions(Reg))
		markInstruction(DefMI, StateWWM, Worklist);
		}
		}
		}
		SetInactiveInstrs.push_back(&MI);
		continue;
} else if (TII->isDisableWQM(MI)) {		} else if (TII->isDisableWQM(MI)) {
BBI.Needs \|= StateExact;		BBI.Needs \|= StateExact;
if (!(BBI.InNeeds & StateExact)) {		if (!(BBI.InNeeds & StateExact)) {
BBI.InNeeds \|= StateExact;		BBI.InNeeds \|= StateExact;
Worklist.push_back(&MBB);		Worklist.push_back(&MBB);
}		}
GlobalFlags \|= StateExact;		GlobalFlags \|= StateExact;
III.Disabled = StateWQM \| StateWWM;		III.Disabled = StateWQM \| StateWWM;
Show All 23 Lines	for (auto II = MBB.begin(), IE = MBB.end(); II != IE; ++II) {
continue;		continue;
}		}

markInstruction(MI, Flags, Worklist);		markInstruction(MI, Flags, Worklist);
GlobalFlags \|= Flags;		GlobalFlags \|= Flags;
}		}
}		}

		// Mark sure that any SET_INACTIVE instructions are computed in WQM if WQM is
		// ever used anywhere in the function. This implements the corresponding
		// semantics of @llvm.amdgcn.set.inactive.
		if (GlobalFlags & StateWQM) {
		for (MachineInstr *MI : SetInactiveInstrs)
		markInstruction(*MI, StateWQM, Worklist);
		}

return GlobalFlags;		return GlobalFlags;
}		}

void SIWholeQuadMode::propagateInstruction(MachineInstr &MI,		void SIWholeQuadMode::propagateInstruction(MachineInstr &MI,
std::vector<WorkItem>& Worklist) {		std::vector<WorkItem>& Worklist) {
MachineBasicBlock *MBB = MI.getParent();		MachineBasicBlock *MBB = MI.getParent();
InstrInfo II = Instructions[&MI]; // take a copy to prevent dangling references		InstrInfo II = Instructions[&MI]; // take a copy to prevent dangling references
BlockInfo &BI = Blocks[MBB];		BlockInfo &BI = Blocks[MBB];
▲ Show 20 Lines • Show All 403 Lines • ▼ Show 20 Lines	MachineInstr *Copy =
.addReg(LiveMaskReg);		.addReg(LiveMaskReg);

LIS->ReplaceMachineInstrInMaps(MI, Copy);		LIS->ReplaceMachineInstrInMaps(MI, Copy);
MI->eraseFromParent();		MI->eraseFromParent();
}		}
}		}

void SIWholeQuadMode::lowerCopyInstrs() {		void SIWholeQuadMode::lowerCopyInstrs() {
for (MachineInstr *MI : LowerToCopyInstrs)		for (MachineInstr *MI : LowerToCopyInstrs) {
		for (unsigned i = MI->getNumExplicitOperands() - 1; i > 1; i--)
		MI->RemoveOperand(i);
MI->setDesc(TII->get(AMDGPU::COPY));		MI->setDesc(TII->get(AMDGPU::COPY));
}		}
		}

bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {		bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
Instructions.clear();		Instructions.clear();
Blocks.clear();		Blocks.clear();
LiveMaskQueries.clear();		LiveMaskQueries.clear();
LowerToCopyInstrs.clear();		LowerToCopyInstrs.clear();
CallingConv = MF.getFunction()->getCallingConv();		CallingConv = MF.getFunction()->getCallingConv();

▲ Show 20 Lines • Show All 55 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll

				; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefix=SI -check-prefix=GCN %s
				; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefix=SI -check-prefix=GCN %s


				; GCN-LABEL: {{^}}set_inactive:
				; GCN: s_not_b64 exec, exec
				; GCN: v_mov_b32_e32 {{v[0-9]+}}, 42
				; GCN: s_not_b64 exec, exec
				define amdgpu_kernel void @set_inactive(i32 addrspace(1)* %out, i32 %in) {
				%tmp = call i32 @llvm.amdgcn.set.inactive.i32(i32 %in, i32 42) #0
				store i32 %tmp, i32 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}set_inactive_64:
				; GCN: s_not_b64 exec, exec
				; GCN: v_mov_b32_e32 {{v[0-9]+}}, 0
				; GCN: v_mov_b32_e32 {{v[0-9]+}}, 0
				; GCN: s_not_b64 exec, exec
				define amdgpu_kernel void @set_inactive_64(i64 addrspace(1)* %out, i64 %in) {
				%tmp = call i64 @llvm.amdgcn.set.inactive.i64(i64 %in, i64 0) #0
				store i64 %tmp, i64 addrspace(1)* %out
				ret void
				}

				declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #0
				declare i64 @llvm.amdgcn.set.inactive.i64(i64, i64) #0

				attributes #0 = { convergent readnone }

llvm/trunk/test/CodeGen/AMDGPU/wqm.ll

Show First 20 Lines • Show All 250 Lines • ▼ Show 20 Lines	if:
%out.0 = call float @llvm.amdgcn.wwm.f32(float %out)		%out.0 = call float @llvm.amdgcn.wwm.f32(float %out)
br label %endif		br label %endif

endif:		endif:
%out.1 = phi float [ %out.0, %if ], [ 0.0, %main_body ]		%out.1 = phi float [ %out.0, %if ], [ 0.0, %main_body ]
ret float %out.1		ret float %out.1
}		}

		; Check that @llvm.amdgcn.set.inactive disables WWM.
		;
		;CHECK-LABEL: {{^}}test_set_inactive1:
		;CHECK: buffer_load_dword
		;CHECK: s_not_b64 exec, exec
		;CHECK: v_mov_b32_e32
		;CHECK: s_not_b64 exec, exec
		;CHECK: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1
		;CHECK: v_add_i32_e32
		define amdgpu_ps void @test_set_inactive1(i32 inreg %idx) {
		main_body:
		%src = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx, i32 0, i1 0, i1 0)
		%src.0 = bitcast float %src to i32
		%src.1 = call i32 @llvm.amdgcn.set.inactive.i32(i32 %src.0, i32 0)
		%out = add i32 %src.1, %src.1
		%out.0 = call i32 @llvm.amdgcn.wwm.i32(i32 %out)
		%out.1 = bitcast i32 %out.0 to float
		call void @llvm.amdgcn.buffer.store.f32(float %out.1, <4 x i32> undef, i32 %idx, i32 0, i1 0, i1 0)
		ret void
		}

		; Check that enabling WQM anywhere enables WQM for the set.inactive source.
		;
		;CHECK-LABEL: {{^}}test_set_inactive2:
		;CHECK: s_wqm_b64 exec, exec
		;CHECK: buffer_load_dword
		;CHECK: buffer_load_dword
		define amdgpu_ps void @test_set_inactive2(i32 inreg %idx0, i32 inreg %idx1) {
		main_body:
		%src1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
		%src1.0 = bitcast float %src1 to i32
		%src1.1 = call i32 @llvm.amdgcn.set.inactive.i32(i32 %src1.0, i32 undef)
		%src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
		%src0.0 = bitcast float %src0 to i32
		%src0.1 = call i32 @llvm.amdgcn.wqm.i32(i32 %src0.0)
		%out = add i32 %src0.1, %src1.1
		%out.0 = bitcast i32 %out to float
		call void @llvm.amdgcn.buffer.store.f32(float %out.0, <4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
		ret void
		}

; Check a case of one branch of an if-else requiring WQM, the other requiring		; Check a case of one branch of an if-else requiring WQM, the other requiring
; exact.		; exact.
;		;
; Note: In this particular case, the save-and-restore could be avoided if the		; Note: In this particular case, the save-and-restore could be avoided if the
; analysis understood that the two branches of the if-else are mutually		; analysis understood that the two branches of the if-else are mutually
; exclusive.		; exclusive.
;		;
;CHECK-LABEL: {{^}}test_control_flow_0:		;CHECK-LABEL: {{^}}test_control_flow_0:
▲ Show 20 Lines • Show All 241 Lines • ▼ Show 20 Lines

; Check prolog shaders.		; Check prolog shaders.
;		;
; CHECK-LABEL: {{^}}test_prolog_1:		; CHECK-LABEL: {{^}}test_prolog_1:
; CHECK: s_mov_b64 [[ORIG:s\[[0-9]+:[0-9]+\]]], exec		; CHECK: s_mov_b64 [[ORIG:s\[[0-9]+:[0-9]+\]]], exec
; CHECK: s_wqm_b64 exec, exec		; CHECK: s_wqm_b64 exec, exec
; CHECK: v_add_f32_e32 v0,		; CHECK: v_add_f32_e32 v0,
; CHECK: s_and_b64 exec, exec, [[ORIG]]		; CHECK: s_and_b64 exec, exec, [[ORIG]]
define amdgpu_ps float @test_prolog_1(float %a, float %b) #4 {		define amdgpu_ps float @test_prolog_1(float %a, float %b) #5 {
main_body:		main_body:
%s = fadd float %a, %b		%s = fadd float %a, %b
ret float %s		ret float %s
}		}

; CHECK-LABEL: {{^}}test_loop_vcc:		; CHECK-LABEL: {{^}}test_loop_vcc:
; CHECK-NEXT: ; %entry		; CHECK-NEXT: ; %entry
; CHECK-NEXT: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec		; CHECK-NEXT: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec
▲ Show 20 Lines • Show All 150 Lines • ▼ Show 20 Lines
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3		declare <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3		declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3		declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3
declare void @llvm.AMDGPU.kill(float) #1		declare void @llvm.AMDGPU.kill(float) #1
declare float @llvm.amdgcn.wqm.f32(float) #3		declare float @llvm.amdgcn.wqm.f32(float) #3
declare i32 @llvm.amdgcn.wqm.i32(i32) #3		declare i32 @llvm.amdgcn.wqm.i32(i32) #3
declare float @llvm.amdgcn.wwm.f32(float) #3		declare float @llvm.amdgcn.wwm.f32(float) #3
declare i32 @llvm.amdgcn.wwm.i32(i32) #3		declare i32 @llvm.amdgcn.wwm.i32(i32) #3
		declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #4
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #3		declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #3
declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #3		declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #3

attributes #1 = { nounwind }		attributes #1 = { nounwind }
attributes #2 = { nounwind readonly }		attributes #2 = { nounwind readonly }
attributes #3 = { nounwind readnone }		attributes #3 = { nounwind readnone }
attributes #4 = { "amdgpu-ps-wqm-outputs" }		attributes #4 = { nounwind readnone convergent }
		attributes #5 = { "amdgpu-ps-wqm-outputs" }

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Implement llvm.amdgcn.set.inactive intrinsic
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 109790

llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td

llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp

llvm/trunk/lib/Target/AMDGPU/SIInstructions.td

llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp

llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll

llvm/trunk/test/CodeGen/AMDGPU/wqm.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Implement llvm.amdgcn.set.inactive intrinsicClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 109790

llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td

llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp

llvm/trunk/lib/Target/AMDGPU/SIInstructions.td

llvm/trunk/lib/Target/AMDGPU/SIWholeQuadMode.cpp

llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll

llvm/trunk/test/CodeGen/AMDGPU/wqm.ll

[AMDGPU] Implement llvm.amdgcn.set.inactive intrinsic
ClosedPublic