Diff 108959

include/llvm/IR/IntrinsicsAMDGPU.td

	Show First 20 Lines • Show All 750 Lines • ▼ Show 20 Lines
	// with the guarantee that the source value is computed as if the entire			// with the guarantee that the source value is computed as if the entire
	// program were executed in Whole Wavefront Mode, i.e. with all channels			// program were executed in Whole Wavefront Mode, i.e. with all channels
	// enabled, with a few exceptions: - Phi nodes with require WWM return an			// enabled, with a few exceptions: - Phi nodes with require WWM return an
	// undefined value.			// undefined value.
	def int_amdgcn_wwm : Intrinsic<[llvm_any_ty],			def int_amdgcn_wwm : Intrinsic<[llvm_any_ty],
	[LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]			[LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
	>;			>;

				// Given a value, copies it while setting all the inactive lanes to a given
				// value. Note that OpenGL helper lanes are considered active, so if the
				// program ever uses WQM, then the source will be computed in WQM.
				def int_amdgcn_set_inactive :
				Intrinsic<[llvm_anyint_ty],
				[LLVMMatchType<0>, // value to be copied
				LLVMMatchType<0>], // value for the inactive lanes to take
				[IntrNoMem, IntrConvergent]>;

	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	// CI+ Intrinsics			// CI+ Intrinsics
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	def int_amdgcn_s_dcache_inv_vol :			def int_amdgcn_s_dcache_inv_vol :
	GCCBuiltin<"__builtin_amdgcn_s_dcache_inv_vol">,			GCCBuiltin<"__builtin_amdgcn_s_dcache_inv_vol">,
	Intrinsic<[], [], []>;			Intrinsic<[], [], []>;

	▲ Show 20 Lines • Show All 77 Lines • Show Last 20 Lines

lib/Target/AMDGPU/SIInstrInfo.cpp

Show First 20 Lines • Show All 1,082 Lines • ▼ Show 20 Lines	if (SrcOp.isImm()) {
.addReg(Dst, RegState::Implicit \| RegState::Define);		.addReg(Dst, RegState::Implicit \| RegState::Define);
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)		BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi)
.addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1))		.addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1))
.addReg(Dst, RegState::Implicit \| RegState::Define);		.addReg(Dst, RegState::Implicit \| RegState::Define);
}		}
MI.eraseFromParent();		MI.eraseFromParent();
break;		break;
}		}
		case AMDGPU::V_SET_INACTIVE_B32: {
		BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
		.addReg(AMDGPU::EXEC);
		BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), MI.getOperand(0).getReg())
		.add(MI.getOperand(2));
		BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
		.addReg(AMDGPU::EXEC);
		MI.eraseFromParent();
		break;
		}
		case AMDGPU::V_SET_INACTIVE_B64: {
		BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
		.addReg(AMDGPU::EXEC);
		MachineInstr *Copy = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_PSEUDO),
		MI.getOperand(0).getReg())
		.add(MI.getOperand(2));
		expandPostRAPseudo(*Copy);
		BuildMI(MBB, MI, DL, get(AMDGPU::S_NOT_B64), AMDGPU::EXEC)
		.addReg(AMDGPU::EXEC);
		MI.eraseFromParent();
		break;
		}
case AMDGPU::V_MOVRELD_B32_V1:		case AMDGPU::V_MOVRELD_B32_V1:
case AMDGPU::V_MOVRELD_B32_V2:		case AMDGPU::V_MOVRELD_B32_V2:
case AMDGPU::V_MOVRELD_B32_V4:		case AMDGPU::V_MOVRELD_B32_V4:
case AMDGPU::V_MOVRELD_B32_V8:		case AMDGPU::V_MOVRELD_B32_V8:
case AMDGPU::V_MOVRELD_B32_V16: {		case AMDGPU::V_MOVRELD_B32_V16: {
const MCInstrDesc &MovRelDesc = get(AMDGPU::V_MOVRELD_B32_e32);		const MCInstrDesc &MovRelDesc = get(AMDGPU::V_MOVRELD_B32_e32);
unsigned VecReg = MI.getOperand(0).getReg();		unsigned VecReg = MI.getOperand(0).getReg();
bool IsUndef = MI.getOperand(1).isUndef();		bool IsUndef = MI.getOperand(1).isUndef();
▲ Show 20 Lines • Show All 3,269 Lines • Show Last 20 Lines

lib/Target/AMDGPU/SIInstructions.td

	Show First 20 Lines • Show All 130 Lines • ▼ Show 20 Lines
	} // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC]			} // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC]

	def EXIT_WWM : SPseudoInstSI <(outs SReg_64:$exec), (ins SReg_64:$src0)> {			def EXIT_WWM : SPseudoInstSI <(outs SReg_64:$exec), (ins SReg_64:$src0)> {
	let hasSideEffects = 0;			let hasSideEffects = 0;
	let mayLoad = 0;			let mayLoad = 0;
	let mayStore = 0;			let mayStore = 0;
	}			}

				// Invert the exec mask and overwrite the inactive lanes of dst with inactive,
				// restoring it after we're done.
				def V_SET_INACTIVE_B32 : VPseudoInstSI <(outs VGPR_32:$vdst),
				(ins VGPR_32: $src, VSrc_b32:$inactive),
				[(set i32:$vdst, (int_amdgcn_set_inactive i32:$src, i32:$inactive))]> {
				let Constraints = "$src = $vdst";
				}

				def V_SET_INACTIVE_B64 : VPseudoInstSI <(outs VReg_64:$vdst),
				(ins VReg_64: $src, VSrc_b64:$inactive),
				[(set i64:$vdst, (int_amdgcn_set_inactive i64:$src, i64:$inactive))]> {
				let Constraints = "$src = $vdst";
				}

	let usesCustomInserter = 1, SALU = 1 in {			let usesCustomInserter = 1, SALU = 1 in {
	def GET_GROUPSTATICSIZE : PseudoInstSI <(outs SReg_32:$sdst), (ins),			def GET_GROUPSTATICSIZE : PseudoInstSI <(outs SReg_32:$sdst), (ins),
	[(set SReg_32:$sdst, (int_amdgcn_groupstaticsize))]>;			[(set SReg_32:$sdst, (int_amdgcn_groupstaticsize))]>;
	} // End let usesCustomInserter = 1, SALU = 1			} // End let usesCustomInserter = 1, SALU = 1

	def S_MOV_B64_term : PseudoInstSI<(outs SReg_64:$dst),			def S_MOV_B64_term : PseudoInstSI<(outs SReg_64:$dst),
	(ins SSrc_b64:$src0)> {			(ins SSrc_b64:$src0)> {
	let SALU = 1;			let SALU = 1;
	▲ Show 20 Lines • Show All 1,180 Lines • Show Last 20 Lines

lib/Target/AMDGPU/SIWholeQuadMode.cpp

Show First 20 Lines • Show All 297 Lines • ▼ Show 20 Lines
}		}

// Scan instructions to determine which ones require an Exact execmask and		// Scan instructions to determine which ones require an Exact execmask and
// which ones seed WQM requirements.		// which ones seed WQM requirements.
char SIWholeQuadMode::scanInstructions(MachineFunction &MF,		char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
std::vector<WorkItem> &Worklist) {		std::vector<WorkItem> &Worklist) {
char GlobalFlags = 0;		char GlobalFlags = 0;
bool WQMOutputs = MF.getFunction()->hasFnAttribute("amdgpu-ps-wqm-outputs");		bool WQMOutputs = MF.getFunction()->hasFnAttribute("amdgpu-ps-wqm-outputs");
		SmallVector<MachineInstr *, 4> SetInactiveInstrs;

// We need to visit the basic blocks in reverse post-order so that we visit		// We need to visit the basic blocks in reverse post-order so that we visit
// defs before uses, in particular so that we don't accidentally mark an		// defs before uses, in particular so that we don't accidentally mark an
// instruction as needing e.g. WQM before visiting it and realizing it needs		// instruction as needing e.g. WQM before visiting it and realizing it needs
// WQM disabled.		// WQM disabled.
ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);		ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
for (auto BI = RPOT.begin(), BE = RPOT.end(); BI != BE; ++BI) {		for (auto BI = RPOT.begin(), BE = RPOT.end(); BI != BE; ++BI) {
MachineBasicBlock &MBB = **BI;		MachineBasicBlock &MBB = **BI;
Show All 22 Lines	for (auto II = MBB.begin(), IE = MBB.end(); II != IE; ++II) {
} else if (Opcode == AMDGPU::WWM) {		} else if (Opcode == AMDGPU::WWM) {
// The WWM intrinsic doesn't make the same guarantee, and plus it needs		// The WWM intrinsic doesn't make the same guarantee, and plus it needs
// to be executed in WQM or Exact so that its copy doesn't clobber		// to be executed in WQM or Exact so that its copy doesn't clobber
// inactive lanes.		// inactive lanes.
markInstructionUses(MI, StateWWM, Worklist);		markInstructionUses(MI, StateWWM, Worklist);
GlobalFlags \|= StateWWM;		GlobalFlags \|= StateWWM;
LowerToCopyInstrs.push_back(&MI);		LowerToCopyInstrs.push_back(&MI);
continue;		continue;
		} else if (Opcode == AMDGPU::V_SET_INACTIVE_B32 \|\|
		Opcode == AMDGPU::V_SET_INACTIVE_B64) {
		III.Disabled = StateWWM;
		MachineOperand &Inactive = MI.getOperand(2);
		if (Inactive.isReg()) {
		if (Inactive.isUndef()) {
		LowerToCopyInstrs.push_back(&MI);
		} else {
		unsigned Reg = Inactive.getReg();
		if (TargetRegisterInfo::isVirtualRegister(Reg)) {
		for (MachineInstr &DefMI : MRI->def_instructions(Reg))
		markInstruction(DefMI, StateWWM, Worklist);
		}
		}
		}
		SetInactiveInstrs.push_back(&MI);
		continue;
} else if (TII->isDisableWQM(MI)) {		} else if (TII->isDisableWQM(MI)) {
BBI.Needs \|= StateExact;		BBI.Needs \|= StateExact;
if (!(BBI.InNeeds & StateExact)) {		if (!(BBI.InNeeds & StateExact)) {
BBI.InNeeds \|= StateExact;		BBI.InNeeds \|= StateExact;
Worklist.push_back(&MBB);		Worklist.push_back(&MBB);
}		}
GlobalFlags \|= StateExact;		GlobalFlags \|= StateExact;
III.Disabled = StateWQM \| StateWWM;		III.Disabled = StateWQM \| StateWWM;
Show All 23 Lines	for (auto II = MBB.begin(), IE = MBB.end(); II != IE; ++II) {
continue;		continue;
}		}

markInstruction(MI, Flags, Worklist);		markInstruction(MI, Flags, Worklist);
GlobalFlags \|= Flags;		GlobalFlags \|= Flags;
}		}
}		}

		if (GlobalFlags & StateWQM) {
		for (MachineInstr *MI : SetInactiveInstrs)
		markInstruction(*MI, StateWQM, Worklist);
		}
		nhaehnleUnsubmitted Not Done Reply Inline Actions Hmm. so automatic propagation of the WQM bit doesn't cover this? It would be nicer if it did, but I don't think it's a big deal in practice. Could you please add an explanatory comment in the code? nhaehnle: Hmm. so automatic propagation of the WQM bit doesn't cover this? It would be nicer if it did…
		cwabbottAuthorUnsubmitted Not Done Reply Inline Actions No, it doesn't, since this is doing something different. It's implementing the semantics we talked about, that if anything in the program needs WQM then the instruction should be in WQM and the source should be in WQM, to make sure that helper lanes participate in reductions. I don't think that can be handled by any kind of propagation. It's also described in the definition of llvm.amdgcn.set.inactive and tested by test_set_inactive2. I can add a comment here to explain that, though. cwabbott: No, it doesn't, since this is doing something different. It's implementing the semantics we…
		nhaehnleUnsubmitted Not Done Reply Inline Actions Ok, thanks. nhaehnle: Ok, thanks.

return GlobalFlags;		return GlobalFlags;
}		}

void SIWholeQuadMode::propagateInstruction(MachineInstr &MI,		void SIWholeQuadMode::propagateInstruction(MachineInstr &MI,
std::vector<WorkItem>& Worklist) {		std::vector<WorkItem>& Worklist) {
MachineBasicBlock *MBB = MI.getParent();		MachineBasicBlock *MBB = MI.getParent();
InstrInfo II = Instructions[&MI]; // take a copy to prevent dangling references		InstrInfo II = Instructions[&MI]; // take a copy to prevent dangling references
BlockInfo &BI = Blocks[MBB];		BlockInfo &BI = Blocks[MBB];
▲ Show 20 Lines • Show All 405 Lines • ▼ Show 20 Lines	MachineInstr *Copy =
.addReg(LiveMaskReg);		.addReg(LiveMaskReg);

LIS->ReplaceMachineInstrInMaps(MI, Copy);		LIS->ReplaceMachineInstrInMaps(MI, Copy);
MI->eraseFromParent();		MI->eraseFromParent();
}		}
}		}

void SIWholeQuadMode::lowerCopyInstrs() {		void SIWholeQuadMode::lowerCopyInstrs() {
for (MachineInstr *MI : LowerToCopyInstrs)		for (MachineInstr *MI : LowerToCopyInstrs) {
		for (unsigned i = MI->getNumExplicitOperands() - 1; i > 1; i--)
		MI->RemoveOperand(i);
MI->setDesc(TII->get(AMDGPU::COPY));		MI->setDesc(TII->get(AMDGPU::COPY));
}		}
		}

bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {		bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
Instructions.clear();		Instructions.clear();
Blocks.clear();		Blocks.clear();
LiveMaskQueries.clear();		LiveMaskQueries.clear();
LowerToCopyInstrs.clear();		LowerToCopyInstrs.clear();
CallingConv = MF.getFunction()->getCallingConv();		CallingConv = MF.getFunction()->getCallingConv();

▲ Show 20 Lines • Show All 55 Lines • Show Last 20 Lines

test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll

This file was added.

				; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefix=SI -check-prefix=GCN %s
				; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s \| FileCheck -check-prefix=SI -check-prefix=GCN %s


				; GCN-LABEL: {{^}}set_inactive:
				; GCN: s_not_b64 exec, exec
				; GCN: v_mov_b32_e32 {{v[0-9]+}}, 42
				; GCN: s_not_b64 exec, exec
				define amdgpu_kernel void @set_inactive(i32 addrspace(1)* %out, i32 %in) {
				%tmp = call i32 @llvm.amdgcn.set.inactive.i32(i32 %in, i32 42) #0
				store i32 %tmp, i32 addrspace(1)* %out
				ret void
				}

				; GCN-LABEL: {{^}}set_inactive_64:
				; GCN: s_not_b64 exec, exec
				; GCN: v_mov_b32_e32 {{v[0-9]+}}, 0
				; GCN: v_mov_b32_e32 {{v[0-9]+}}, 0
				; GCN: s_not_b64 exec, exec
				define amdgpu_kernel void @set_inactive_64(i64 addrspace(1)* %out, i64 %in) {
				%tmp = call i64 @llvm.amdgcn.set.inactive.i64(i64 %in, i64 0) #0
				store i64 %tmp, i64 addrspace(1)* %out
				ret void
				}

				declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #0
				declare i64 @llvm.amdgcn.set.inactive.i64(i64, i64) #0

				attributes #0 = { convergent readnone }

test/CodeGen/AMDGPU/wqm.ll

Show First 20 Lines • Show All 231 Lines • ▼ Show 20 Lines	if:
%out.0 = call float @llvm.amdgcn.wwm.f32(float %out)		%out.0 = call float @llvm.amdgcn.wwm.f32(float %out)
br label %endif		br label %endif

endif:		endif:
%out.1 = phi float [ %out.0, %if ], [ 0.0, %main_body ]		%out.1 = phi float [ %out.0, %if ], [ 0.0, %main_body ]
ret float %out.1		ret float %out.1
}		}

		; Check that @llvm.amdgcn.set.inactive disables WWM.
		;
		;CHECK-LABEL: {{^}}test_set_inactive1:
		;CHECK: buffer_load_dword
		;CHECK: s_not_b64 exec, exec
		;CHECK: v_mov_b32_e32
		;CHECK: s_not_b64 exec, exec
		;CHECK: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1
		;CHECK: v_add_i32_e32
		define amdgpu_ps void @test_set_inactive1(i32 inreg %idx) {
		main_body:
		%src = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx, i32 0, i1 0, i1 0)
		%src.0 = bitcast float %src to i32
		%src.1 = call i32 @llvm.amdgcn.set.inactive.i32(i32 %src.0, i32 0)
		%out = add i32 %src.1, %src.1
		%out.0 = call i32 @llvm.amdgcn.wwm.i32(i32 %out)
		%out.1 = bitcast i32 %out.0 to float
		call void @llvm.amdgcn.buffer.store.f32(float %out.1, <4 x i32> undef, i32 %idx, i32 0, i1 0, i1 0)
		ret void
		}

		; Check that enabling WQM anywhere enables WQM for the set.inactive source.
		;
		;CHECK-LABEL: {{^}}test_set_inactive2:
		;CHECK: s_wqm_b64 exec, exec
		;CHECK: buffer_load_dword
		;CHECK: buffer_load_dword
		define amdgpu_ps void @test_set_inactive2(i32 inreg %idx0, i32 inreg %idx1) {
		main_body:
		%src1 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
		%src1.0 = bitcast float %src1 to i32
		%src1.1 = call i32 @llvm.amdgcn.set.inactive.i32(i32 %src1.0, i32 undef)
		%src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0)
		%src0.0 = bitcast float %src0 to i32
		%src0.1 = call i32 @llvm.amdgcn.wqm.i32(i32 %src0.0)
		%out = add i32 %src0.1, %src1.1
		%out.0 = bitcast i32 %out to float
		call void @llvm.amdgcn.buffer.store.f32(float %out.0, <4 x i32> undef, i32 %idx1, i32 0, i1 0, i1 0)
		ret void
		}

; Check a case of one branch of an if-else requiring WQM, the other requiring		; Check a case of one branch of an if-else requiring WQM, the other requiring
; exact.		; exact.
;		;
; Note: In this particular case, the save-and-restore could be avoided if the		; Note: In this particular case, the save-and-restore could be avoided if the
; analysis understood that the two branches of the if-else are mutually		; analysis understood that the two branches of the if-else are mutually
; exclusive.		; exclusive.
;		;
;CHECK-LABEL: {{^}}test_control_flow_0:		;CHECK-LABEL: {{^}}test_control_flow_0:
▲ Show 20 Lines • Show All 241 Lines • ▼ Show 20 Lines

; Check prolog shaders.		; Check prolog shaders.
;		;
; CHECK-LABEL: {{^}}test_prolog_1:		; CHECK-LABEL: {{^}}test_prolog_1:
; CHECK: s_mov_b64 [[ORIG:s\[[0-9]+:[0-9]+\]]], exec		; CHECK: s_mov_b64 [[ORIG:s\[[0-9]+:[0-9]+\]]], exec
; CHECK: s_wqm_b64 exec, exec		; CHECK: s_wqm_b64 exec, exec
; CHECK: v_add_f32_e32 v0,		; CHECK: v_add_f32_e32 v0,
; CHECK: s_and_b64 exec, exec, [[ORIG]]		; CHECK: s_and_b64 exec, exec, [[ORIG]]
define amdgpu_ps float @test_prolog_1(float %a, float %b) #4 {		define amdgpu_ps float @test_prolog_1(float %a, float %b) #5 {
main_body:		main_body:
%s = fadd float %a, %b		%s = fadd float %a, %b
ret float %s		ret float %s
}		}

; CHECK-LABEL: {{^}}test_loop_vcc:		; CHECK-LABEL: {{^}}test_loop_vcc:
; CHECK-NEXT: ; %entry		; CHECK-NEXT: ; %entry
; CHECK-NEXT: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec		; CHECK-NEXT: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec
▲ Show 20 Lines • Show All 149 Lines • ▼ Show 20 Lines
declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #3		declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #3
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3		declare <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3		declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3		declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3
declare void @llvm.AMDGPU.kill(float) #1		declare void @llvm.AMDGPU.kill(float) #1
declare float @llvm.amdgcn.wqm.f32(float) #3		declare float @llvm.amdgcn.wqm.f32(float) #3
declare i32 @llvm.amdgcn.wqm.i32(i32) #3		declare i32 @llvm.amdgcn.wqm.i32(i32) #3
declare float @llvm.amdgcn.wwm.f32(float) #3		declare float @llvm.amdgcn.wwm.f32(float) #3
		declare i32 @llvm.amdgcn.wwm.i32(i32) #3
		declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #4
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #3		declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #3
declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #3		declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #3

attributes #1 = { nounwind }		attributes #1 = { nounwind }
attributes #2 = { nounwind readonly }		attributes #2 = { nounwind readonly }
attributes #3 = { nounwind readnone }		attributes #3 = { nounwind readnone }
attributes #4 = { "amdgpu-ps-wqm-outputs" }		attributes #4 = { nounwind readnone convergent }
		attributes #5 = { "amdgpu-ps-wqm-outputs" }

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Implement llvm.amdgcn.set.inactive intrinsic
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 108959

include/llvm/IR/IntrinsicsAMDGPU.td

lib/Target/AMDGPU/SIInstrInfo.cpp

lib/Target/AMDGPU/SIInstructions.td

lib/Target/AMDGPU/SIWholeQuadMode.cpp

test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll

test/CodeGen/AMDGPU/wqm.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Implement llvm.amdgcn.set.inactive intrinsicClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 108959

include/llvm/IR/IntrinsicsAMDGPU.td

lib/Target/AMDGPU/SIInstrInfo.cpp

lib/Target/AMDGPU/SIInstructions.td

lib/Target/AMDGPU/SIWholeQuadMode.cpp

test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll

test/CodeGen/AMDGPU/wqm.ll

[AMDGPU] Implement llvm.amdgcn.set.inactive intrinsic
ClosedPublic