Diff 548050

llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp

Show First 20 Lines • Show All 102 Lines • ▼ Show 20 Lines	static bool isVSlideInstr(const MachineInstr &MI) {
case RISCV::VSLIDEDOWN_VX:		case RISCV::VSLIDEDOWN_VX:
case RISCV::VSLIDEDOWN_VI:		case RISCV::VSLIDEDOWN_VI:
case RISCV::VSLIDEUP_VX:		case RISCV::VSLIDEUP_VX:
case RISCV::VSLIDEUP_VI:		case RISCV::VSLIDEUP_VI:
return true;		return true;
}		}
}		}

		static bool isCompressInstr(const MachineInstr &MI) {
		return getRVVMCOpcode(MI.getOpcode()) == RISCV::VCOMPRESS_VM;
		}

		static bool isReductionInstr(const MachineInstr &MI) {
		switch (getRVVMCOpcode(MI.getOpcode())) {
		default:
		return false;
		case RISCV::VREDSUM_VS:
		case RISCV::VREDMAXU_VS:
		case RISCV::VREDMAX_VS:
		case RISCV::VREDMINU_VS:
		case RISCV::VREDMIN_VS:
		case RISCV::VREDAND_VS:
		case RISCV::VREDOR_VS:
		case RISCV::VREDXOR_VS:
		case RISCV::VWREDSUMU_VS:
		case RISCV::VWREDSUM_VS:
		case RISCV::VFREDOSUM_VS:
		case RISCV::VFREDUSUM_VS:
		case RISCV::VFREDMIN_VS:
		case RISCV::VFREDMAX_VS:
		case RISCV::VFWREDOSUM_VS:
		case RISCV::VFWREDUSUM_VS:
		return true;
		}
		}

/// Get the EEW for a load or store instruction. Return std::nullopt if MI is		/// Get the EEW for a load or store instruction. Return std::nullopt if MI is
/// not a load or store which ignores SEW.		/// not a load or store which ignores SEW.
static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {		static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
switch (getRVVMCOpcode(MI.getOpcode())) {		switch (getRVVMCOpcode(MI.getOpcode())) {
default:		default:
return std::nullopt;		return std::nullopt;
case RISCV::VLE8_V:		case RISCV::VLE8_V:
case RISCV::VLSE8_V:		case RISCV::VLSE8_V:
▲ Show 20 Lines • Show All 220 Lines • ▼ Show 20 Lines	DemandedFields getDemanded(const MachineInstr &MI,
}		}

// Store instructions don't use the policy fields.		// Store instructions don't use the policy fields.
if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {		if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {
Res.TailPolicy = false;		Res.TailPolicy = false;
Res.MaskPolicy = false;		Res.MaskPolicy = false;
}		}

// If this is a mask reg operation, it only cares about VLMAX.		// If this is a mask reg operation, it only cares about VLMAX.
		craig.topperUnsubmitted Done Reply Inline Actions This is incorrect for vcompress, reductions, and vmv.s.x. craig.topper: This is incorrect for vcompress, reductions, and vmv.s.x.
		jacquesguanAuthorUnsubmitted Done Reply Inline Actions Done, exclude these instructions. jacquesguan: Done, exclude these instructions.
// TODO: Possible extensions to this logic		// TODO: Possible extensions to this logic
// * Probably ok if available VLMax is larger than demanded		// * Probably ok if available VLMax is larger than demanded
// * The policy bits can probably be ignored..		// * The policy bits can probably be ignored..
if (isMaskRegOp(MI)) {		if (isMaskRegOp(MI)) {
Res.SEW = DemandedFields::SEWNone;		Res.SEW = DemandedFields::SEWNone;
		lukeUnsubmitted Done Reply Inline Actions The VL operand can be stale here because this is called from doLocalPostpass, after the pseudo has been expanded and the vsetvlis are inserted. Is it possible to move this into needVSETVLI? luke: The VL operand can be stale here because this is called from doLocalPostpass, after the pseudo…
		jacquesguanAuthorUnsubmitted Done Reply Inline Actions Done, moved to `needVSETVLI`. jacquesguan: Done, moved to `needVSETVLI`.
Res.LMUL = false;		Res.LMUL = false;
}		}

// For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.		// For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
if (isScalarMoveInstr(MI)) {		if (isScalarMoveInstr(MI)) {
Res.LMUL = false;		Res.LMUL = false;
Res.SEWLMULRatio = false;		Res.SEWLMULRatio = false;
Res.VLAny = false;		Res.VLAny = false;
▲ Show 20 Lines • Show All 423 Lines • ▼ Show 20 Lines
#ifndef NDEBUG		#ifndef NDEBUG
if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) {		if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) {
assert(SEW == EEW && "Initial SEW doesn't match expected EEW");		assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
}		}
#endif		#endif
InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);		InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);

return InstrInfo;		return InstrInfo;
}		}
		craig.topperUnsubmitted Done Reply Inline Actions `!decodeVLMUL(VLMul).second` craig.topper: `!decodeVLMUL(VLMul).second`

void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,		void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
const VSETVLIInfo &Info,		const VSETVLIInfo &Info,
const VSETVLIInfo &PrevInfo) {		const VSETVLIInfo &PrevInfo) {
DebugLoc DL = MI.getDebugLoc();		DebugLoc DL = MI.getDebugLoc();
insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo);		insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo);
}		}

▲ Show 20 Lines • Show All 128 Lines • ▼ Show 20 Lines	bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
if (isVSlideInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&		if (isVSlideInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) {		isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) {
Used.VLAny = false;		Used.VLAny = false;
Used.VLZeroness = true;		Used.VLZeroness = true;
Used.LMUL = false;		Used.LMUL = false;
Used.TailPolicy = false;		Used.TailPolicy = false;
}		}

		// For most instructions, tail element is defined as:
		// tail(x) = (vl <= x < max(VLMAX,VLEN/SEW))
		// So if the avl is VLMAX, and LMUL is not fractional, there is no tail
		// element, so it doesn't need tail policy.
		if (!isScalarMoveInstr(MI) && !isReductionInstr(MI) && !isCompressInstr(MI))
		if (RISCVII::hasVLOp(MI.getDesc().TSFlags)) {
		RISCVII::VLMUL VLMul = RISCVII::getLMul(MI.getDesc().TSFlags);
		// Fractional LMULs always require tail policy.
		if (VLMul < RISCVII::LMUL_RESERVED) {
		craig.topperUnsubmitted Done Reply Inline Actions `!decodeVLMUL(VLMul).second` craig.topper: `!decodeVLMUL(VLMul).second`
		const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
		if (VLOp.isImm()) {
		int64_t Imm = VLOp.getImm();
		if (Imm == RISCV::VLMaxSentinel)
		Used.TailPolicy = false;
		} else if (VLOp.getReg() == RISCV::X0) {
		Used.TailPolicy = false;
		}
		}
		}

// A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the same		// A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the same
// semantically as vmv.s.x. This is particularly useful since we don't have an		// semantically as vmv.s.x. This is particularly useful since we don't have an
// immediate form of vmv.s.x, and thus frequently use vmv.v.i in it's place.		// immediate form of vmv.s.x, and thus frequently use vmv.v.i in it's place.
// Since a splat is non-constant time in LMUL, we do need to be careful to not		// Since a splat is non-constant time in LMUL, we do need to be careful to not
// increase the number of active vector registers (unlike for vmv.s.x.)		// increase the number of active vector registers (unlike for vmv.s.x.)
if (isScalarSplatInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&		if (isScalarSplatInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) {		isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) {
Used.LMUL = false;		Used.LMUL = false;
▲ Show 20 Lines • Show All 639 Lines • Show Last 20 Lines

llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll

	Show First 20 Lines • Show All 379 Lines • ▼ Show 20 Lines

	; This shouldn't be folded because we need to preserve exceptions with			; This shouldn't be folded because we need to preserve exceptions with
	; "fpexcept.strict" exception behaviour, and masking may hide them.			; "fpexcept.strict" exception behaviour, and masking may hide them.
	define <vscale x 2 x float> @vpmerge_constrained_fadd_vlmax(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m) strictfp {			define <vscale x 2 x float> @vpmerge_constrained_fadd_vlmax(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m) strictfp {
	; CHECK-LABEL: vpmerge_constrained_fadd_vlmax:			; CHECK-LABEL: vpmerge_constrained_fadd_vlmax:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma			; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
	; CHECK-NEXT: vfadd.vv v9, v9, v10			; CHECK-NEXT: vfadd.vv v9, v9, v10
	; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma
	; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0			; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%a = call <vscale x 2 x float> @llvm.experimental.constrained.fadd(<vscale x 2 x float> %x, <vscale x 2 x float> %y, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp			%a = call <vscale x 2 x float> @llvm.experimental.constrained.fadd(<vscale x 2 x float> %x, <vscale x 2 x float> %y, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp
	%b = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(<vscale x 2 x float> %passthru, <vscale x 2 x float> %passthru, <vscale x 2 x float> %a, <vscale x 2 x i1> %m, i64 -1) strictfp			%b = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(<vscale x 2 x float> %passthru, <vscale x 2 x float> %passthru, <vscale x 2 x float> %a, <vscale x 2 x i1> %m, i64 -1) strictfp
	ret <vscale x 2 x float> %b			ret <vscale x 2 x float> %b
	}			}

	; Test conversion by fptosi.			; Test conversion by fptosi.
	▲ Show 20 Lines • Show All 671 Lines • Show Last 20 Lines

llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll

Show First 20 Lines • Show All 573 Lines • ▼ Show 20 Lines	entry:
%2 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(		%2 = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(
<vscale x 1 x double> undef,		<vscale x 1 x double> undef,
<vscale x 1 x double> %1,		<vscale x 1 x double> %1,
<vscale x 1 x double> %c,		<vscale x 1 x double> %c,
i64 7, i64 %0)		i64 7, i64 %0)
ret <vscale x 1 x double> %2		ret <vscale x 1 x double> %2
}		}

		define <vscale x 1 x i64> @test21(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, <vscale x 1 x i1> %mask) nounwind {
		; CHECK-LABEL: test21:
		; CHECK: # %bb.0: # %entry
		; CHECK-NEXT: vsetvli a0, zero, e64, m1, tu, mu
		craig.topperUnsubmitted Done Reply Inline Actions Wouldn't we want ta instead of tu since there is no tail? craig.topper: Wouldn't we want ta instead of tu since there is no tail?
		jacquesguanAuthorUnsubmitted Done Reply Inline Actions Done. jacquesguan: Done.
		; CHECK-NEXT: vadd.vv v8, v8, v8, v0.t
		; CHECK-NEXT: vadd.vv v9, v9, v8, v0.t
		; CHECK-NEXT: vmv1r.v v8, v9
		; CHECK-NEXT: ret
		entry:
		%x = call <vscale x 1 x i64> @llvm.riscv.vadd.mask.nxv1i64.nxv1i64(
		<vscale x 1 x i64> %a,
		<vscale x 1 x i64> %a,
		<vscale x 1 x i64> %a,
		<vscale x 1 x i1> %mask,
		i64 -1,
		i64 0)
		%y = call <vscale x 1 x i64> @llvm.riscv.vadd.mask.nxv1i64.nxv1i64(
		<vscale x 1 x i64> %b,
		<vscale x 1 x i64> %b,
		<vscale x 1 x i64> %x,
		<vscale x 1 x i1> %mask,
		i64 -1,
		i64 1)
		ret <vscale x 1 x i64> %y
		}

; This used to fail the machine verifier due to the vsetvli being removed		; This used to fail the machine verifier due to the vsetvli being removed
; while the add was still using it.		; while the add was still using it.
define i64 @bad_removal(<2 x i64> %arg) {		define i64 @bad_removal(<2 x i64> %arg) {
; CHECK-LABEL: bad_removal:		; CHECK-LABEL: bad_removal:
; CHECK: # %bb.0: # %bb		; CHECK: # %bb.0: # %bb
; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma		; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma
; CHECK-NEXT: vmv.x.s a0, v8		; CHECK-NEXT: vmv.x.s a0, v8
; CHECK-NEXT: vsetivli a1, 16, e64, m1, ta, ma		; CHECK-NEXT: vsetivli a1, 16, e64, m1, ta, ma
▲ Show 20 Lines • Show All 49 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[RISCV] Teach VSETVLIInserter to not demand tail policy when there is no tail element
Needs RevisionPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 548050

llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp

llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll

llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll

This is an archive of the discontinued LLVM Phabricator instance.

[RISCV] Teach VSETVLIInserter to not demand tail policy when there is no tail elementNeeds RevisionPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 548050

llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp

llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll

llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll

[RISCV] Teach VSETVLIInserter to not demand tail policy when there is no tail element
Needs RevisionPublic