Diff 64855

include/llvm/CodeGen/LiveIntervalAnalysis.h

Show First 20 Lines • Show All 386 Lines • ▼ Show 20 Lines	public:
LiveRange *getCachedRegUnit(unsigned Unit) {		LiveRange *getCachedRegUnit(unsigned Unit) {
return RegUnitRanges[Unit];		return RegUnitRanges[Unit];
}		}

const LiveRange *getCachedRegUnit(unsigned Unit) const {		const LiveRange *getCachedRegUnit(unsigned Unit) const {
return RegUnitRanges[Unit];		return RegUnitRanges[Unit];
}		}

		/// removeRegUnit - Remove computed live range for Unit. Subsequent uses
		/// should rely on on-demand recomputation.
		void removeRegUnit(unsigned Unit) {
		delete RegUnitRanges[Unit];
		RegUnitRanges[Unit] = nullptr;
		}
		arsenmUnsubmitted Done Reply Inline Actions This should probably be a separate patch arsenm: This should probably be a separate patch

/// Remove value numbers and related live segments starting at position		/// Remove value numbers and related live segments starting at position
/// @p Pos that are part of any liverange of physical register @p Reg or one		/// @p Pos that are part of any liverange of physical register @p Reg or one
/// of its subregisters.		/// of its subregisters.
void removePhysRegDefAt(unsigned Reg, SlotIndex Pos);		void removePhysRegDefAt(unsigned Reg, SlotIndex Pos);

/// Remove value number and related live segments of @p LI and its subranges		/// Remove value number and related live segments of @p LI and its subranges
/// that start at position @p Pos.		/// that start at position @p Pos.
void removeVRegDefAt(LiveInterval &LI, SlotIndex Pos);		void removeVRegDefAt(LiveInterval &LI, SlotIndex Pos);
▲ Show 20 Lines • Show All 51 Lines • Show Last 20 Lines

lib/Target/AMDGPU/SIWholeQuadMode.cpp

Show First 20 Lines • Show All 130 Lines • ▼ Show 20 Lines	private:
void propagateValueSub(const WorkItem::Value &V, LaneBitmask LaneMask,		void propagateValueSub(const WorkItem::Value &V, LaneBitmask LaneMask,
const LiveRange &LR, std::vector<WorkItem> &Worklist);		const LiveRange &LR, std::vector<WorkItem> &Worklist);
void propagateValue(const WorkItem::Value &V,		void propagateValue(const WorkItem::Value &V,
std::vector<WorkItem> &Worklist);		std::vector<WorkItem> &Worklist);
void propagateInstruction(MachineInstr &MI, std::vector<WorkItem> &Worklist);		void propagateInstruction(MachineInstr &MI, std::vector<WorkItem> &Worklist);
void propagateBlock(MachineBasicBlock &MBB, std::vector<WorkItem> &Worklist);		void propagateBlock(MachineBasicBlock &MBB, std::vector<WorkItem> &Worklist);
char analyzeFunction(MachineFunction &MF);		char analyzeFunction(MachineFunction &MF);

		bool requiresCorrectState(const MachineInstr &MI) const;

		MachineBasicBlock::iterator saveSCC(MachineBasicBlock &MBB,
		MachineBasicBlock::iterator Before);
		MachineBasicBlock::iterator prepareInsertion(MachineBasicBlock &MBB,
		MachineBasicBlock::iterator First,
		MachineBasicBlock::iterator Last,
		bool PreferLast, bool SaveSCC);
void toExact(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,		void toExact(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,
unsigned SaveWQM, unsigned LiveMaskReg);		unsigned SaveWQM, unsigned LiveMaskReg);
void toWQM(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,		void toWQM(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,
unsigned SavedWQM);		unsigned SavedWQM);
void processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg, bool isEntry);		void processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg, bool isEntry);

void lowerLiveMaskQueries(unsigned LiveMaskReg);		void lowerLiveMaskQueries(unsigned LiveMaskReg);

▲ Show 20 Lines • Show All 332 Lines • ▼ Show 20 Lines	else if (WI.MBB)
propagateBlock(*WI.MBB, Worklist);		propagateBlock(*WI.MBB, Worklist);
else		else
propagateValue(WI.V, Worklist);		propagateValue(WI.V, Worklist);
}		}

return GlobalFlags;		return GlobalFlags;
}		}

		/// Whether \p MI really requires the exec state computed during analysis.
		///
		/// Scalar instructions must occasionally be marked WQM for correct propagation
		/// (e.g. thread masks leading up to branches), but when it comes actual
		/// execution, they don't care about EXEC.
		bool SIWholeQuadMode::requiresCorrectState(const MachineInstr &MI) const {
		// Skip instructions that are not affected by EXEC
		if (TII->isScalarUnit(MI))
		return false;

		// Generic instructions such as COPY will either disappear by register
		// coalescing or be lowered to SALU or VALU instructions.
		if (TargetInstrInfo::isGenericOpcode(MI.getOpcode())) {
		if (MI.getNumExplicitOperands() >= 1) {
		const MachineOperand &Op = MI.getOperand(0);
		if (Op.isReg()) {
		arsenmUnsubmitted Not Done Reply Inline Actions MI.isTransient() might be more accurate. INLINEASM for example will not be removed arsenm: MI.isTransient() might be more accurate. INLINEASM for example will not be removed
		if (TRI->isSGPRReg(*MRI, Op.getReg())) {
		// SGPR instructions are not affected by EXEC
		return false;
		}
		}
		}
		}

		return true;
		}


		MachineBasicBlock::iterator
		SIWholeQuadMode::saveSCC(MachineBasicBlock &MBB,
		MachineBasicBlock::iterator Before) {
		unsigned SaveReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);

		MachineInstr *Save =
		BuildMI(MBB, Before , DebugLoc(), TII->get(AMDGPU::S_CSELECT_B32),
		SaveReg)
		.addImm(1)
		.addImm(0);
		MachineInstr *Restore =
		BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_CMP_GT_U32))
		.addReg(SaveReg)
		arsenmUnsubmitted Done Reply Inline Actions I think this should use -1 instead of 1, and use compare != 0 to be more canonical arsenm: I think this should use -1 instead of 1, and use compare != 0 to be more canonical
		arsenmUnsubmitted Done Reply Inline Actions Maybe you should just emit COPY and move this expansion to copyPhysReg for scc arsenm: Maybe you should just emit COPY and move this expansion to copyPhysReg for scc
		.addImm(0);

		LIS->InsertMachineInstrInMaps(*Save);
		LIS->InsertMachineInstrInMaps(*Restore);
		LIS->createAndComputeVirtRegInterval(SaveReg);

		return Restore;
		}

		MachineBasicBlock::iterator
		SIWholeQuadMode::prepareInsertion(MachineBasicBlock &MBB,
		MachineBasicBlock::iterator First,
		MachineBasicBlock::iterator Last,
		bool PreferLast, bool SaveSCC) {
		if (!SaveSCC)
		return PreferLast ? Last : First;

		LiveRange &LR = LIS->getRegUnit(*MCRegUnitIterator(AMDGPU::SCC, TRI));
		auto MBBE = MBB.end();
		SlotIndex FirstIdx = First != MBBE ? LIS->getInstructionIndex(*First)
		: LIS->getMBBEndIdx(&MBB);
		SlotIndex LastIdx = Last != MBBE ? LIS->getInstructionIndex(*Last)
		: LIS->getMBBEndIdx(&MBB);
		SlotIndex Idx = PreferLast ? LastIdx : FirstIdx;
		const LiveRange::Segment *S;

		for (;;) {
		S = LR.getSegmentContaining(Idx);
		if (!S)
		break;

		if (PreferLast) {
		SlotIndex Next = S->start.getBaseIndex();
		if (Next < FirstIdx)
		break;
		Idx = Next;
		} else {
		SlotIndex Next = S->end.getNextIndex().getBaseIndex();
		if (Next > LastIdx)
		break;
		Idx = Next;
		}
		}

		MachineBasicBlock::iterator MBBI;

		if (MachineInstr *MI = LIS->getInstructionFromIndex(Idx))
		MBBI = MI;
		else {
		assert(Idx == LIS->getMBBEndIdx(&MBB));
		MBBI = MBB.end();
		}

		if (S)
		MBBI = saveSCC(MBB, MBBI);

		return MBBI;
		}

void SIWholeQuadMode::toExact(MachineBasicBlock &MBB,		void SIWholeQuadMode::toExact(MachineBasicBlock &MBB,
MachineBasicBlock::iterator Before,		MachineBasicBlock::iterator Before,
unsigned SaveWQM, unsigned LiveMaskReg) {		unsigned SaveWQM, unsigned LiveMaskReg) {
MachineInstr *MI;		MachineInstr *MI;

if (SaveWQM) {		if (SaveWQM) {
MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_AND_SAVEEXEC_B64),		MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_AND_SAVEEXEC_B64),
		arsenmUnsubmitted Not Done Reply Inline Actions Should set DebugLoc arsenm: Should set DebugLoc
SaveWQM)		SaveWQM)
.addReg(LiveMaskReg);		.addReg(LiveMaskReg);
} else {		} else {
MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_AND_B64),		MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_AND_B64),
AMDGPU::EXEC)		AMDGPU::EXEC)
		arsenmUnsubmitted Not Done Reply Inline Actions These can be combined with findRegisterUseOperandIdx arsenm: These can be combined with findRegisterUseOperandIdx
.addReg(AMDGPU::EXEC)		.addReg(AMDGPU::EXEC)
.addReg(LiveMaskReg);		.addReg(LiveMaskReg);
}		}

LIS->InsertMachineInstrInMaps(*MI);		LIS->InsertMachineInstrInMaps(*MI);
}		}

void SIWholeQuadMode::toWQM(MachineBasicBlock &MBB,		void SIWholeQuadMode::toWQM(MachineBasicBlock &MBB,
MachineBasicBlock::iterator Before,		MachineBasicBlock::iterator Before,
unsigned SavedWQM) {		unsigned SavedWQM) {
MachineInstr *MI;		MachineInstr *MI;

if (SavedWQM) {		if (SavedWQM) {
MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), AMDGPU::EXEC)		MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), AMDGPU::EXEC)
		arsenmUnsubmitted Not Done Reply Inline Actions Should set DebugLoc arsenm: Should set DebugLoc
.addReg(SavedWQM);		.addReg(SavedWQM);
} else {		} else {
MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_WQM_B64),		MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
AMDGPU::EXEC)		AMDGPU::EXEC)
.addReg(AMDGPU::EXEC);		.addReg(AMDGPU::EXEC);
}		}

LIS->InsertMachineInstrInMaps(*MI);		LIS->InsertMachineInstrInMaps(*MI);
Show All 9 Lines	void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,

if (!(BI.Needs.In & StateWQM))		if (!(BI.Needs.In & StateWQM))
return;		return;

// This is a non-entry block that is WQM throughout, so no need to do		// This is a non-entry block that is WQM throughout, so no need to do
// anything.		// anything.
if (!isEntry && !(BI.Needs.Self & StateExact) && BI.Needs.Out != StateExact)		if (!isEntry && !(BI.Needs.Self & StateExact) && BI.Needs.Out != StateExact)
return;		return;

		arsenmUnsubmitted Not Done Reply Inline Actions This function is kind of complicated to special case track SCC liveness. Can you use LivePhysRegs or check the LiveIntervals to simplify it (I think there was some issue with tracking physical registers with LIS last time I tried to do it, but I don't remember) arsenm: This function is kind of complicated to special case track SCC liveness. Can you use…
DEBUG(dbgs() << "\nProcessing block BB#" << MBB.getNumber() << ":\n");		DEBUG(dbgs() << "\nProcessing block BB#" << MBB.getNumber() << ":\n");

		// First pass: Scan forward to find the ranges in which switching
		// instructions are required.
unsigned SavedWQMReg = 0;		unsigned SavedWQMReg = 0;
bool WQMFromExec = isEntry;		bool WQMFromExec = isEntry;
char State = isEntry ? StateExact : StateWQM;		char State = isEntry ? StateExact : StateWQM;
MachineInstr *FirstNonWQM = nullptr;

auto II = MBB.getFirstNonPHI(), IE = MBB.end();		auto II = MBB.getFirstNonPHI(), IE = MBB.getFirstTerminator();
while (II != IE) {		if (isEntry)
		arsenmUnsubmitted Done Reply Inline Actions Capitalize arsenm: Capitalize
MachineInstr &MI = *II;		++II; // Skip the instruction that saves LiveMask
++II;

// Skip instructions that are not affected by EXEC		MachineBasicBlock::iterator First = IE;
if (TII->isScalarUnit(MI) && !MI.isTerminator())		for (;;) {
continue;		char Needs = 0;
		char OutNeeds = 0;

// Generic instructions such as COPY will either disappear by register		if (First == IE)
		arsenmUnsubmitted Not Done Reply Inline Actions The Clean = true can be hoisted to the initializer arsenm: The Clean = true can be hoisted to the initializer
// coalescing or be lowered to SALU or VALU instructions.		First = II;
if (TargetInstrInfo::isGenericOpcode(MI.getOpcode())) {
if (MI.getNumExplicitOperands() >= 1) {		if (II != IE) {
const MachineOperand &Op = MI.getOperand(0);		MachineInstr &MI = *II;
		arsenmUnsubmitted Not Done Reply Inline Actions range loop arsenm: range loop
if (Op.isReg()) {
		arsenmUnsubmitted Not Done Reply Inline Actions Maybe this should get the read operand and see if it is undef? arsenm: Maybe this should get the read operand and see if it is undef?
if (TRI->isSGPRReg(*MRI, Op.getReg())) {		if (requiresCorrectState(MI)) {
// SGPR instructions are not affected by EXEC		auto III = Instructions.find(&MI);
continue;		if (III != Instructions.end()) {
}		Needs = III->second.Needs;
		OutNeeds = III->second.OutNeeds;
		arsenmUnsubmitted Not Done Reply Inline Actions Should check if it's dead? arsenm: Should check if it's dead?
}		}
}		}
		} else {
		// End of basic block
		if ((BI.Needs.Out & StateWQM) && State != StateWQM)
		Needs = StateWQM;
		else if (BI.Needs.Out == StateExact && State != StateExact)
		Needs = StateExact;
}		}

DEBUG(dbgs() << " " << MI);		if (Needs) {
		if (Needs != State) {
		MachineBasicBlock::iterator Before =
		prepareInsertion(MBB, First, II, Needs == StateWQM,
		Needs == StateExact \|\| WQMFromExec);

char Needs = 0;
char OutNeeds = 0;
auto InstrInfoIt = Instructions.find(&MI);
if (InstrInfoIt != Instructions.end()) {
Needs = InstrInfoIt->second.Needs;
OutNeeds = InstrInfoIt->second.OutNeeds;
}

// Keep track of the first consecutive non-WQM instruction, so that we
// switch away from WQM as soon as possible, potentially saving a small
// bit of bandwidth on loads.
if (Needs == StateWQM)
FirstNonWQM = nullptr;
else if (!FirstNonWQM)
FirstNonWQM = &MI;

// State switching
if (Needs && State != Needs) {
if (Needs == StateExact) {		if (Needs == StateExact) {
assert(!SavedWQMReg);

if (!WQMFromExec && (OutNeeds & StateWQM))		if (!WQMFromExec && (OutNeeds & StateWQM))
		arsenmUnsubmitted Not Done Reply Inline Actions I think these should use MI's DebugLoc arsenm: I think these should use MI's DebugLoc
SavedWQMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);		SavedWQMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);

toExact(MBB, FirstNonWQM, SavedWQMReg, LiveMaskReg);		toExact(MBB, Before, SavedWQMReg, LiveMaskReg);
} else {		} else {
assert(WQMFromExec == (SavedWQMReg == 0));		assert(WQMFromExec == (SavedWQMReg == 0));
toWQM(MBB, &MI, SavedWQMReg);
		toWQM(MBB, Before, SavedWQMReg);

if (SavedWQMReg) {		if (SavedWQMReg) {
		arsenmUnsubmitted Done Reply Inline Actions Insert through LIS arsenm: Insert through LIS
LIS->createAndComputeVirtRegInterval(SavedWQMReg);		LIS->createAndComputeVirtRegInterval(SavedWQMReg);
SavedWQMReg = 0;		SavedWQMReg = 0;
}		}
		arsenmUnsubmitted Done Reply Inline Actions This can just return Restore arsenm: This can just return Restore
}		}

State = Needs;		State = Needs;
}		}
}

if ((BI.Needs.Out & StateWQM) && State != StateWQM) {		First = IE;
assert(WQMFromExec == (SavedWQMReg == 0));		}
toWQM(MBB, MBB.end(), SavedWQMReg);

if (SavedWQMReg)		if (II == IE)
LIS->createAndComputeVirtRegInterval(SavedWQMReg);		break;
} else if (BI.Needs.Out == StateExact && State != StateExact) {		++II;
toExact(MBB, FirstNonWQM ? MachineBasicBlock::iterator(FirstNonWQM)
: MBB.getFirstTerminator(),
0, LiveMaskReg);
}		}
}		}

void SIWholeQuadMode::lowerLiveMaskQueries(unsigned LiveMaskReg) {		void SIWholeQuadMode::lowerLiveMaskQueries(unsigned LiveMaskReg) {
for (MachineInstr *MI : LiveMaskQueries) {		for (MachineInstr *MI : LiveMaskQueries) {
const DebugLoc &DL = MI->getDebugLoc();		const DebugLoc &DL = MI->getDebugLoc();
unsigned Dest = MI->getOperand(0).getReg();		unsigned Dest = MI->getOperand(0).getReg();
MachineInstr *NewMI =		MachineInstr *NewMI =
▲ Show 20 Lines • Show All 59 Lines • ▼ Show 20 Lines	if (GlobalFlags != StateWQM) {
for (auto &BII : Blocks)		for (auto &BII : Blocks)
processBlock(const_cast<MachineBasicBlock &>(*BII.first), LiveMaskReg,		processBlock(const_cast<MachineBasicBlock &>(*BII.first), LiveMaskReg,
BII.first == &*MF.begin());		BII.first == &*MF.begin());
}		}

if (LiveMaskReg)		if (LiveMaskReg)
LIS->createAndComputeVirtRegInterval(LiveMaskReg);		LIS->createAndComputeVirtRegInterval(LiveMaskReg);

		// Physical registers like SCC aren't tracked by default anyway, so just
		// removing the ranges we computed is the simplest option for maintaining
		// the analysis results.
		LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::SCC, TRI));

return true;		return true;
}		}

test/CodeGen/AMDGPU/wqm.ll

Show First 20 Lines • Show All 414 Lines • ▼ Show 20 Lines	end:
%r = phi <4 x float> [ %r.if, %if ], [ %r.else, %else ]		%r = phi <4 x float> [ %r.if, %if ], [ %r.else, %else ]

%gep = getelementptr float, float addrspace(1)* %ptr, i32 %idx		%gep = getelementptr float, float addrspace(1)* %ptr, i32 %idx
store float 1.0, float addrspace(1)* %gep		store float 1.0, float addrspace(1)* %gep

ret <4 x float> %r		ret <4 x float> %r
}		}

		; Test awareness that s_wqm_b64 clobbers SCC.
		arsenmUnsubmitted Not Done Reply Inline Actions I don't see check lines for the cmp + select restore pattern here. Where is the scc def? This test also probably needs a comment arsenm: I don't see check lines for the cmp + select restore pattern here. Where is the scc def? This…
		nhaehnleAuthorUnsubmitted Not Done Reply Inline Actions That's because I didn't actually manage to concoct a test where the pattern is unavoidable. Usually, the SCC def ends up right before the SCC use, so that the WQM instruction can just be moved around it. I've tested the pattern with some artificial hacks in the code (in the latest version by setting First == Last in prepareInsertion). The problem is that I cannot prove that it will never be needed. For example, it might be needed if the machine scheduler makes unusual decisions about moving store instructions between the SCC def and the SCC use. nhaehnle: That's because I didn't actually manage to concoct a test where the pattern is unavoidable.
		;
		; CHECK-LABEL: {{^}}test_scc:
		; CHECK: s_mov_b64 [[ORIG:s\[[0-9]+:[0-9]+\]]], exec
		; CHECK: s_wqm_b64 exec, exec
		; CHECK: s_cmp_
		; CHECK-NEXT: s_cbranch_scc
		arsenmUnsubmitted Done Reply Inline Actions CHECK-NEXT for s_cbranch_scc? arsenm: CHECK-NEXT for s_cbranch_scc?
		; CHECK: ; %if
		; CHECK: s_and_b64 exec, exec, [[ORIG]]
		; CHECK: image_sample
		; CHECK: ; %else
		; CHECK: s_and_b64 exec, exec, [[ORIG]]
		; CHECK: image_sample
		; CHECK: ; %end
		define amdgpu_ps <4 x float> @test_scc(float addrspace(1)* inreg %ptr, i32 inreg %sel, i32 %idx) #1 {
		main_body:
		%cc = icmp sgt i32 %sel, 0
		br i1 %cc, label %if, label %else

		if:
		%r.if = call <4 x float> @llvm.SI.image.sample.i32(i32 0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
		br label %end

		else:
		%r.else = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 0, i32 1>, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
		br label %end

		end:
		%r = phi <4 x float> [ %r.if, %if ], [ %r.else, %else ]

		%gep = getelementptr float, float addrspace(1)* %ptr, i32 %idx
		store float 1.0, float addrspace(1)* %gep

		ret <4 x float> %r
		}

declare void @llvm.amdgcn.image.store.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1		declare void @llvm.amdgcn.image.store.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1

declare <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2		declare <4 x float> @llvm.amdgcn.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2

declare <4 x float> @llvm.SI.image.sample.i32(i32, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #3		declare <4 x float> @llvm.SI.image.sample.i32(i32, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #3
declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #3		declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #3
declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #3		declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #3

declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #3		declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #3
declare void @llvm.AMDGPU.kill(float)		declare void @llvm.AMDGPU.kill(float)
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)		declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #1 = { nounwind }		attributes #1 = { nounwind }
attributes #2 = { nounwind readonly }		attributes #2 = { nounwind readonly }
attributes #3 = { nounwind readnone }		attributes #3 = { nounwind readnone }
attributes #4 = { "amdgpu-ps-wqm-outputs" }		attributes #4 = { "amdgpu-ps-wqm-outputs" }

This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU: Do not clobber SCC in SIWholeQuadMode
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 64855

include/llvm/CodeGen/LiveIntervalAnalysis.h

lib/Target/AMDGPU/SIWholeQuadMode.cpp

test/CodeGen/AMDGPU/wqm.ll

This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU: Do not clobber SCC in SIWholeQuadModeClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 64855

include/llvm/CodeGen/LiveIntervalAnalysis.h

lib/Target/AMDGPU/SIWholeQuadMode.cpp

test/CodeGen/AMDGPU/wqm.ll

AMDGPU: Do not clobber SCC in SIWholeQuadMode
ClosedPublic