This is an archive of the discontinued LLVM Phabricator instance.

lib/Target/AMDGPU/SIISelLowering.cpp
3525–3527 ↗	(On Diff #75257)	I think calling simplifyDemandedBits either here or in performUCharToFloat combine will eliminate the need for this code.
3560 ↗	(On Diff #75257)	Nevermind, I see that this transform is correct, because performUCharToFloatCombine() is checking that the high bits are all zero.

Use getZExtOrTrunc

Herald edited edge metadata. · View Herald TranscriptOct 21 2016, 10:46 AM

kzhuravl added inline comments.Oct 21 2016, 10:46 AM

lib/Target/AMDGPU/SIISelLowering.cpp
3525–3527 ↗	(On Diff #75257)	In this case, simplifyDemandedBits will replace zero_extend with any_extend.

Only check for zero_extend

Herald edited edge metadata. · View Herald TranscriptOct 21 2016, 12:08 PM

LGTM. Can you mention in the commit message that this will prevent a regression when enabling i16 support, and note the test that would regress.

This revision is now accepted and ready to land.Oct 21 2016, 1:09 PM

kzhuravl updated this object.Oct 21 2016, 3:11 PM

kzhuravl edited edge metadata.

Closed by commit rL284891: [AMDGPU] Perform uchar to float combine for ISD::SINT_TO_FP (authored by kzhuravl). · Explain WhyOct 21 2016, 3:19 PM

This revision was automatically updated to reflect the committed changes.

arsenm added inline comments.Oct 26 2016, 12:41 PM

lib/Target/AMDGPU/SIISelLowering.cpp
3560 ↗	(On Diff #75257)	That checks if the high 24 bits are zero. For the signed case it needs to check if 25 bits are 0 so it is still incorrect

Revision Contents

Path

Size

llvm/

trunk/

lib/

Target/

AMDGPU/

SIISelLowering.cpp

17 lines

Diff 75497

llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp

Show First 20 Lines • Show All 227 Lines • ▼ Show 20 Lines	SITargetLowering::SITargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SMIN);		setTargetDAGCombine(ISD::SMIN);
setTargetDAGCombine(ISD::SMAX);		setTargetDAGCombine(ISD::SMAX);
setTargetDAGCombine(ISD::UMIN);		setTargetDAGCombine(ISD::UMIN);
setTargetDAGCombine(ISD::UMAX);		setTargetDAGCombine(ISD::UMAX);
setTargetDAGCombine(ISD::SETCC);		setTargetDAGCombine(ISD::SETCC);
setTargetDAGCombine(ISD::AND);		setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR);		setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::XOR);		setTargetDAGCombine(ISD::XOR);
		setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);		setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::FCANONICALIZE);		setTargetDAGCombine(ISD::FCANONICALIZE);

// All memory operations. Some folding on the pointer operand is done to help		// All memory operations. Some folding on the pointer operand is done to help
// matching the constant offsets in the addressing modes.		// matching the constant offsets in the addressing modes.
setTargetDAGCombine(ISD::LOAD);		setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::STORE);		setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::ATOMIC_LOAD);		setTargetDAGCombine(ISD::ATOMIC_LOAD);
▲ Show 20 Lines • Show All 3,271 Lines • ▼ Show 20 Lines	case AMDGPUISD::FMAX_LEGACY: {
break;		break;
}		}

case AMDGPUISD::CVT_F32_UBYTE0:		case AMDGPUISD::CVT_F32_UBYTE0:
case AMDGPUISD::CVT_F32_UBYTE1:		case AMDGPUISD::CVT_F32_UBYTE1:
case AMDGPUISD::CVT_F32_UBYTE2:		case AMDGPUISD::CVT_F32_UBYTE2:
case AMDGPUISD::CVT_F32_UBYTE3: {		case AMDGPUISD::CVT_F32_UBYTE3: {
unsigned Offset = N->getOpcode() - AMDGPUISD::CVT_F32_UBYTE0;		unsigned Offset = N->getOpcode() - AMDGPUISD::CVT_F32_UBYTE0;

SDValue Src = N->getOperand(0);		SDValue Src = N->getOperand(0);
		SDValue Srl = N->getOperand(0);
		if (Srl.getOpcode() == ISD::ZERO_EXTEND)
		Srl = Srl.getOperand(0);

// TODO: Handle (or x, (srl y, 8)) pattern when known bits are zero.		// TODO: Handle (or x, (srl y, 8)) pattern when known bits are zero.
if (Src.getOpcode() == ISD::SRL) {		if (Srl.getOpcode() == ISD::SRL) {
// cvt_f32_ubyte0 (srl x, 16) -> cvt_f32_ubyte2 x		// cvt_f32_ubyte0 (srl x, 16) -> cvt_f32_ubyte2 x
// cvt_f32_ubyte1 (srl x, 16) -> cvt_f32_ubyte3 x		// cvt_f32_ubyte1 (srl x, 16) -> cvt_f32_ubyte3 x
// cvt_f32_ubyte0 (srl x, 8) -> cvt_f32_ubyte1 x		// cvt_f32_ubyte0 (srl x, 8) -> cvt_f32_ubyte1 x

if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(1))) {		if (const ConstantSDNode *C =
		dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
		Srl = DAG.getZExtOrTrunc(Srl.getOperand(0), SDLoc(Srl.getOperand(0)),
		EVT(MVT::i32));

unsigned SrcOffset = C->getZExtValue() + 8 * Offset;		unsigned SrcOffset = C->getZExtValue() + 8 * Offset;
if (SrcOffset < 32 && SrcOffset % 8 == 0) {		if (SrcOffset < 32 && SrcOffset % 8 == 0) {
return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0 + SrcOffset / 8, DL,		return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0 + SrcOffset / 8, DL,
MVT::f32, Src.getOperand(0));		MVT::f32, Srl);
}		}
}		}
}		}

APInt Demanded = APInt::getBitsSet(32, 8 * Offset, 8 * Offset + 8);		APInt Demanded = APInt::getBitsSet(32, 8 * Offset, 8 * Offset + 8);

APInt KnownZero, KnownOne;		APInt KnownZero, KnownOne;
TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),		TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
!DCI.isBeforeLegalizeOps());		!DCI.isBeforeLegalizeOps());
const TargetLowering &TLI = DAG.getTargetLoweringInfo();		const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLO.ShrinkDemandedConstant(Src, Demanded) \|\|		if (TLO.ShrinkDemandedConstant(Src, Demanded) \|\|
TLI.SimplifyDemandedBits(Src, Demanded, KnownZero, KnownOne, TLO)) {		TLI.SimplifyDemandedBits(Src, Demanded, KnownZero, KnownOne, TLO)) {
DCI.CommitTargetLoweringOpt(TLO);		DCI.CommitTargetLoweringOpt(TLO);
}		}

break;		break;
}		}
		case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: {		case ISD::UINT_TO_FP: {
return performUCharToFloatCombine(N, DCI);		return performUCharToFloatCombine(N, DCI);
}		}
case ISD::FADD: {		case ISD::FADD: {
if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)		if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
break;		break;

EVT VT = N->getValueType(0);		EVT VT = N->getValueType(0);
▲ Show 20 Lines • Show All 508 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Perform uchar to float combine for ISD::SINT_TO_FPClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 75497

llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp

[AMDGPU] Perform uchar to float combine for ISD::SINT_TO_FP
ClosedPublic