This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU: Move permlane discard vdst_in optimization
ClosedPublic

Authored by arsenm on Jan 16 2020, 7:07 AM.

Download Raw Diff

Details

Reviewers

Summary

This case can be handled as a regular selection pattern, so move it
out of the weird post-isel folding code which doesn't have an exactly
equivalent place in GlobalISel.

I think it doesn't make much sense to do this optimization here
though, and it would be more useful in instcombine. There's not really
any new information that will be gained during lowering since these
inputs were known from the beginning.

Diff Detail

Event Timeline

arsenm created this revision.Jan 16 2020, 7:07 AM

Herald added a project: Restricted Project. · View Herald TranscriptJan 16 2020, 7:07 AM

Herald added subscribers: kerbowa, hiraditya, t-tye and 7 others. · View Herald Transcript

arsenm added a child revision: D72852: AMDGPU/GlobalISel: Select permlane16/permlanex16.Jan 16 2020, 7:56 AM

rampitec added inline comments.Jan 16 2020, 12:13 PM

llvm/lib/Target/AMDGPU/VOP3Instructions.td
682	Don't you need added complexity here? The complexity of both patterns looks the same.

arsenm marked an inline comment as done.Jan 16 2020, 12:20 PM

arsenm added inline comments.

llvm/lib/Target/AMDGPU/VOP3Instructions.td
682	No, I think the custom predicate boosts the complexity score

LGTM

llvm/lib/Target/AMDGPU/VOP3Instructions.td
682	Right, found it here: https://github.com/llvm-mirror/llvm/blob/master/utils/TableGen/CodeGenDAGPatterns.cpp#L1357

This revision is now accepted and ready to land.Jan 16 2020, 12:32 PM

91e758b7329b4ff134684e661af93a85c436a460

foad added a reverting change: D140546: [AMDGPU] Remove permlane discard vdst_in optimization from isel.Dec 22 2022, 4:04 AM

foad added a reverting change: rG7e1e99381650: [AMDGPU] Remove permlane discard vdst_in optimization from isel.Dec 22 2022, 7:52 AM

Revision Contents

Path

Size

llvm/

lib/

Target/

AMDGPU/

SIISelLowering.cpp

18 lines

VOP3Instructions.td

46 lines

Diff 238489

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 10,367 Lines • ▼ Show 20 Lines	case AMDGPU::V_DIV_SCALE_F64: {

SmallVector<SDValue, 4> Ops = { Src0, Src1, Src2 };		SmallVector<SDValue, 4> Ops = { Src0, Src1, Src2 };
for (unsigned I = 3, N = Node->getNumOperands(); I != N; ++I)		for (unsigned I = 3, N = Node->getNumOperands(); I != N; ++I)
Ops.push_back(Node->getOperand(I));		Ops.push_back(Node->getOperand(I));

Ops.push_back(ImpDef.getValue(1));		Ops.push_back(ImpDef.getValue(1));
return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);		return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
}		}
case AMDGPU::V_PERMLANE16_B32:
case AMDGPU::V_PERMLANEX16_B32: {
ConstantSDNode *FI = cast<ConstantSDNode>(Node->getOperand(0));
ConstantSDNode *BC = cast<ConstantSDNode>(Node->getOperand(2));
if (!FI->getZExtValue() && !BC->getZExtValue())
break;
SDValue VDstIn = Node->getOperand(6);
if (VDstIn.isMachineOpcode()
&& VDstIn.getMachineOpcode() == AMDGPU::IMPLICIT_DEF)
break;
MachineSDNode *ImpDef = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF,
SDLoc(Node), MVT::i32);
SmallVector<SDValue, 8> Ops = { SDValue(FI, 0), Node->getOperand(1),
SDValue(BC, 0), Node->getOperand(3),
Node->getOperand(4), Node->getOperand(5),
SDValue(ImpDef, 0), Node->getOperand(7) };
return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
}
default:		default:
break;		break;
}		}

return Node;		return Node;
}		}

/// Assign the register class depending on the number of		/// Assign the register class depending on the number of
▲ Show 20 Lines • Show All 665 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Show First 20 Lines • Show All 633 Lines • ▼ Show 20 Lines	def VOP3_PERMLANE_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, i32]>, VOP3_OPSEL> {
let InsVOP3OpSel = (ins IntOpSelMods:$src0_modifiers, VRegSrc_32:$src0,		let InsVOP3OpSel = (ins IntOpSelMods:$src0_modifiers, VRegSrc_32:$src0,
IntOpSelMods:$src1_modifiers, SCSrc_b32:$src1,		IntOpSelMods:$src1_modifiers, SCSrc_b32:$src1,
IntOpSelMods:$src2_modifiers, SCSrc_b32:$src2,		IntOpSelMods:$src2_modifiers, SCSrc_b32:$src2,
VGPR_32:$vdst_in, op_sel:$op_sel);		VGPR_32:$vdst_in, op_sel:$op_sel);
let HasClamp = 0;		let HasClamp = 0;
let HasOMod = 0;		let HasOMod = 0;
}		}

		class PermlanePat<SDPatternOperator permlane,
		Instruction inst> : GCNPat<
		(permlane i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2,
		timm:$fi, timm:$bc),
		(inst (as_i1imm $fi), $src0, (as_i1imm $bc),
		$src1, 0, $src2, $vdst_in)
		>;

		// Permlane intrinsic that has either fetch invalid or bound control
		// fields enabled.
		class BoundControlOrFetchInvalidPermlane<SDPatternOperator permlane> :
		PatFrag<(ops node:$vdst_in, node:$src0, node:$src1, node:$src2,
		node:$fi, node:$bc),
		(permlane node:$vdst_in, node:$src0, node:
		$src1, node:$src2, node:$fi, node:$bc)> {
		let PredicateCode = [{ return N->getConstantOperandVal(5) != 0 \|\|
		N->getConstantOperandVal(6) != 0; }];
		}

		// Drop the input value if it won't be read.
		class PermlaneDiscardVDstIn<SDPatternOperator permlane,
		Instruction inst> : GCNPat<
		(permlane srcvalue, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc),
		(inst (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2,
		(IMPLICIT_DEF))
		>;


let SubtargetPredicate = isGFX10Plus in {		let SubtargetPredicate = isGFX10Plus in {
def V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;		def V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
def : ThreeOp_i32_Pats<xor, xor, V_XOR3_B32>;		def : ThreeOp_i32_Pats<xor, xor, V_XOR3_B32>;

let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {		let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
def V_PERMLANE16_B32 : VOP3Inst <"v_permlane16_b32", VOP3_PERMLANE_Profile>;		def V_PERMLANE16_B32 : VOP3Inst <"v_permlane16_b32", VOP3_PERMLANE_Profile>;
def V_PERMLANEX16_B32 : VOP3Inst <"v_permlanex16_b32", VOP3_PERMLANE_Profile>;		def V_PERMLANEX16_B32 : VOP3Inst <"v_permlanex16_b32", VOP3_PERMLANE_Profile>;
} // End $vdst = $vdst_in, DisableEncoding $vdst_in		} // End $vdst = $vdst_in, DisableEncoding $vdst_in

def : GCNPat<		def : PermlanePat<int_amdgcn_permlane16, V_PERMLANE16_B32>;
(int_amdgcn_permlane16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc),		def : PermlanePat<int_amdgcn_permlanex16, V_PERMLANEX16_B32>;
(V_PERMLANE16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in)
>;		def : PermlaneDiscardVDstIn<
		rampitecUnsubmitted Not Done Reply Inline Actions Don't you need added complexity here? The complexity of both patterns looks the same. rampitec: Don't you need added complexity here? The complexity of both patterns looks the same.
		arsenmAuthorUnsubmitted Done Reply Inline Actions No, I think the custom predicate boosts the complexity score arsenm: No, I think the custom predicate boosts the complexity score
		rampitecUnsubmitted Not Done Reply Inline Actions Right, found it here: https://github.com/llvm-mirror/llvm/blob/master/utils/TableGen/CodeGenDAGPatterns.cpp#L1357 rampitec: Right, found it here: https://github.com/llvm-mirror/llvm/blob/master/utils/TableGen/CodeGenDAG…
def : GCNPat<		BoundControlOrFetchInvalidPermlane<int_amdgcn_permlane16>,
(int_amdgcn_permlanex16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc),		V_PERMLANE16_B32>;
(V_PERMLANEX16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in)		def : PermlaneDiscardVDstIn<
>;		BoundControlOrFetchInvalidPermlane<int_amdgcn_permlanex16>,
		V_PERMLANEX16_B32>;

} // End SubtargetPredicate = isGFX10Plus		} // End SubtargetPredicate = isGFX10Plus

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// Integer Clamp Patterns		// Integer Clamp Patterns
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

class getClampPat<VOPProfile P, SDPatternOperator node> {		class getClampPat<VOPProfile P, SDPatternOperator node> {
dag ret3 = (P.DstVT (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2));		dag ret3 = (P.DstVT (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2));
▲ Show 20 Lines • Show All 476 Lines • Show Last 20 Lines