This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU: Only use legal inline immediates with kill pseudo
ClosedPublic

Authored by arsenm on Jul 7 2016, 12:01 PM.

Download Raw Diff

Details

Reviewers

• tstellarAMD
nhaehnle
arsenm

Summary

Only if the value is negative or positive is what matters,
so use a constant that doesn't require an instruction to
materialize.

Diff Detail

Event Timeline

arsenm updated this revision to Diff 63112.Jul 7 2016, 12:01 PM

arsenm retitled this revision from to AMDGPU: Only use legal inline immediates with kill pseudo.

arsenm updated this object.

arsenm added a reviewer: nhaehnle.

arsenm added a subscriber: llvm-commits.

Herald added a reviewer: • tstellarAMD. · View Herald TranscriptJul 7 2016, 12:01 PM

Herald added subscribers: kzhuravl, arsenm. · View Herald Transcript

more tests

I don't understand why changing the type to i32 is necessary. Isn't -1.0 also a legal inline immediate practically everywhere?

In D22108#477764, @nhaehnle wrote:

I don't understand why changing the type to i32 is necessary. Isn't -1.0 also a legal inline immediate practically everywhere?

It's not, it's just more natural to express a boolean with an integer

I agree, but the D3D designers in their infinite wisdom designed a KILL opcode with a floating point parameter, which we inherited via TGSI in Mesa, which in turn motivated this (indeed somewhat questionable) intrinsic.

Hmm. My concern is that (non-constant) KILL gets lowered to a floating point comparison, which treats NaNs specially. (i32)-1 is a NaN. I admit that it doesn't really matter since this patch uses a constant, but it still leaves me with an uncomfortable feeling.

In D22108#478244, @nhaehnle wrote:

I agree, but the D3D designers in their infinite wisdom designed a KILL opcode with a floating point parameter, which we inherited via TGSI in Mesa, which in turn motivated this (indeed somewhat questionable) intrinsic.

Hmm. My concern is that (non-constant) KILL gets lowered to a floating point comparison, which treats NaNs specially. (i32)-1 is a NaN. I admit that it doesn't really matter since this patch uses a constant, but it still leaves me with an uncomfortable feeling.

The intrinsic itself's type doesn't change, this is just an internal implementation detail

Use -1.0

ping

I forgot about this patch. I investigated our kill code generation some more, and it turns out that changing the "default" kill intrinsic to take an i1 instead of a float leads to better code anyway.

So if this change here is urgent to you for some reason, just go ahead and commit as-is and I'll rebase my changes on top of it, otherwise I'll soon post a change that should end up having the same effect.

In D22108#488377, @nhaehnle wrote:

I forgot about this patch. I investigated our kill code generation some more, and it turns out that changing the "default" kill intrinsic to take an i1 instead of a float leads to better code anyway.

So if this change here is urgent to you for some reason, just go ahead and commit as-is and I'll rebase my changes on top of it, otherwise I'll soon post a change that should end up having the same effect.

Removing the kilp intrinsic would also be nice

Yes, I'm working on a combination of patches for LLVM and Mesa that replaces both llvm.AMDGPU.kill and llvm.amdgpu.kilp with an llvm.amdgcn.kill(i1) intrinsic, where the argument is true if the thread should be killed.

r275988

This revision is now accepted and ready to land.Jul 19 2016, 9:46 AM

arsenm closed this revision.Jul 19 2016, 9:46 AM

Revision Contents

Path

Size

lib/

Target/

AMDGPU/

AMDGPUISelLowering.h

1 line

AMDGPUISelLowering.cpp

1 line

AMDGPUInstrInfo.td

5 lines

SIISelLowering.cpp

9 lines

SIInstructions.td

2 lines

Diff 63780

lib/Target/AMDGPU/AMDGPUISelLowering.h

Show First 20 Lines • Show All 297 Lines • ▼ Show 20 Lines	enum NodeType : unsigned {
BUILD_VERTICAL_VECTOR,		BUILD_VERTICAL_VECTOR,
/// Pointer to the start of the shader's constant data.		/// Pointer to the start of the shader's constant data.
CONST_DATA_PTR,		CONST_DATA_PTR,
SENDMSG,		SENDMSG,
INTERP_MOV,		INTERP_MOV,
INTERP_P1,		INTERP_P1,
INTERP_P2,		INTERP_P2,
PC_ADD_REL_OFFSET,		PC_ADD_REL_OFFSET,
		KILL,
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,		FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
STORE_MSKOR,		STORE_MSKOR,
LOAD_CONSTANT,		LOAD_CONSTANT,
TBUFFER_STORE_FORMAT,		TBUFFER_STORE_FORMAT,
ATOMIC_CMP_SWAP,		ATOMIC_CMP_SWAP,
ATOMIC_INC,		ATOMIC_INC,
ATOMIC_DEC,		ATOMIC_DEC,
LAST_AMDGPU_ISD_NUMBER		LAST_AMDGPU_ISD_NUMBER
};		};


} // End namespace AMDGPUISD		} // End namespace AMDGPUISD

} // End namespace llvm		} // End namespace llvm

#endif		#endif

lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Show First 20 Lines • Show All 2,865 Lines • ▼ Show 20 Lines	const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(SAMPLEL)		NODE_NAME_CASE(SAMPLEL)
NODE_NAME_CASE(CVT_F32_UBYTE0)		NODE_NAME_CASE(CVT_F32_UBYTE0)
NODE_NAME_CASE(CVT_F32_UBYTE1)		NODE_NAME_CASE(CVT_F32_UBYTE1)
NODE_NAME_CASE(CVT_F32_UBYTE2)		NODE_NAME_CASE(CVT_F32_UBYTE2)
NODE_NAME_CASE(CVT_F32_UBYTE3)		NODE_NAME_CASE(CVT_F32_UBYTE3)
NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)		NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
NODE_NAME_CASE(CONST_DATA_PTR)		NODE_NAME_CASE(CONST_DATA_PTR)
NODE_NAME_CASE(PC_ADD_REL_OFFSET)		NODE_NAME_CASE(PC_ADD_REL_OFFSET)
		NODE_NAME_CASE(KILL)
case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;		case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
NODE_NAME_CASE(SENDMSG)		NODE_NAME_CASE(SENDMSG)
NODE_NAME_CASE(INTERP_MOV)		NODE_NAME_CASE(INTERP_MOV)
NODE_NAME_CASE(INTERP_P1)		NODE_NAME_CASE(INTERP_P1)
NODE_NAME_CASE(INTERP_P2)		NODE_NAME_CASE(INTERP_P2)
NODE_NAME_CASE(STORE_MSKOR)		NODE_NAME_CASE(STORE_MSKOR)
NODE_NAME_CASE(LOAD_CONSTANT)		NODE_NAME_CASE(LOAD_CONSTANT)
NODE_NAME_CASE(TBUFFER_STORE_FORMAT)		NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
▲ Show 20 Lines • Show All 119 Lines • Show Last 20 Lines

lib/Target/AMDGPU/AMDGPUInstrInfo.td

Show All 34 Lines	def AMDGPUDivScaleOp : SDTypeProfile<2, 3,
[SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>]		[SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>]
>;		>;

// float, float, float, vcc		// float, float, float, vcc
def AMDGPUFmasOp : SDTypeProfile<1, 4,		def AMDGPUFmasOp : SDTypeProfile<1, 4,
[SDTCisFP<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<4>]		[SDTCisFP<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<4>]
>;		>;

		def AMDGPUKillSDT : SDTypeProfile<0, 1, [SDTCisInt<0>]>;

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// AMDGPU DAG Nodes		// AMDGPU DAG Nodes
//		//

def AMDGPUconstdata_ptr : SDNode<		def AMDGPUconstdata_ptr : SDNode<
"AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 1, [SDTCisVT<0, iPTR>,		"AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 1, [SDTCisVT<0, iPTR>,
SDTCisVT<0, iPTR>]>		SDTCisVT<0, iPTR>]>
>;		>;
▲ Show 20 Lines • Show All 189 Lines • ▼ Show 20 Lines
def AMDGPUinterp_p1 : SDNode<"AMDGPUISD::INTERP_P1",		def AMDGPUinterp_p1 : SDNode<"AMDGPUISD::INTERP_P1",
SDTypeProfile<1, 3, [SDTCisFP<0>]>,		SDTypeProfile<1, 3, [SDTCisFP<0>]>,
[SDNPInGlue, SDNPOutGlue]>;		[SDNPInGlue, SDNPOutGlue]>;

def AMDGPUinterp_p2 : SDNode<"AMDGPUISD::INTERP_P2",		def AMDGPUinterp_p2 : SDNode<"AMDGPUISD::INTERP_P2",
SDTypeProfile<1, 4, [SDTCisFP<0>]>,		SDTypeProfile<1, 4, [SDTCisFP<0>]>,
[SDNPInGlue]>;		[SDNPInGlue]>;

		def AMDGPUkill : SDNode<"AMDGPUISD::KILL", AMDGPUKillSDT,
		[SDNPHasChain, SDNPSideEffect]>;

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// Flow Control Profile Types		// Flow Control Profile Types
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// Branch instruction where second and third are basic blocks		// Branch instruction where second and third are basic blocks
def SDTIL_BRCond : SDTypeProfile<0, 2, [		def SDTIL_BRCond : SDTypeProfile<0, 2, [
SDTCisVT<0, OtherVT>		SDTCisVT<0, OtherVT>
]>;		]>;

Show All 13 Lines

lib/Target/AMDGPU/SIISelLowering.cpp

Show First 20 Lines • Show All 2,277 Lines • ▼ Show 20 Lines	case AMDGPUIntrinsic::SI_tbuffer_store: {
MachineMemOperand *MMO = MF.getMachineMemOperand(		MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo(),		MachinePointerInfo(),
MachineMemOperand::MOStore,		MachineMemOperand::MOStore,
VT.getStoreSize(), 4);		VT.getStoreSize(), 4);
return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL,		return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL,
Op->getVTList(), Ops, VT, MMO);		Op->getVTList(), Ops, VT, MMO);
}		}
case AMDGPUIntrinsic::AMDGPU_kill: {		case AMDGPUIntrinsic::AMDGPU_kill: {
if (const ConstantFPSDNode *K = dyn_cast<ConstantFPSDNode>(Op.getOperand(2))) {		SDValue Src = Op.getOperand(2);
		if (const ConstantFPSDNode *K = dyn_cast<ConstantFPSDNode>(Src)) {
if (!K->isNegative())		if (!K->isNegative())
return Chain;		return Chain;

		SDValue NegOne = DAG.getTargetConstant(FloatToBits(-1.0f), DL, MVT::i32);
		return DAG.getNode(AMDGPUISD::KILL, DL, MVT::Other, Chain, NegOne);
}		}

return Op;		SDValue Cast = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Src);
		return DAG.getNode(AMDGPUISD::KILL, DL, MVT::Other, Chain, Cast);
}		}
default:		default:
return SDValue();		return SDValue();
}		}
}		}

SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {		SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);		SDLoc DL(Op);
▲ Show 20 Lines • Show All 1,462 Lines • Show Last 20 Lines

lib/Target/AMDGPU/SIInstructions.td

Show First 20 Lines • Show All 1,990 Lines • ▼ Show 20 Lines	def SI_END_CF : PseudoInstSI <
[(int_amdgcn_end_cf i64:$saved)]		[(int_amdgcn_end_cf i64:$saved)]
>;		>;

} // End Uses = [EXEC], Defs = [EXEC, SCC]		} // End Uses = [EXEC], Defs = [EXEC, SCC]

let Uses = [EXEC], Defs = [EXEC,VCC] in {		let Uses = [EXEC], Defs = [EXEC,VCC] in {
def SI_KILL : PseudoInstSI <		def SI_KILL : PseudoInstSI <
(outs), (ins VSrc_32:$src),		(outs), (ins VSrc_32:$src),
[(int_AMDGPU_kill f32:$src)]> {		[(AMDGPUkill i32:$src)]> {
let isConvergent = 1;		let isConvergent = 1;
let usesCustomInserter = 1;		let usesCustomInserter = 1;
}		}

def SI_KILL_TERMINATOR : PseudoInstSI <		def SI_KILL_TERMINATOR : PseudoInstSI <
(outs), (ins VSrc_32:$src)> {		(outs), (ins VSrc_32:$src)> {
let isTerminator = 1;		let isTerminator = 1;
}		}
▲ Show 20 Lines • Show All 1,565 Lines • Show Last 20 Lines