Diff 532933

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h

	Show First 20 Lines • Show All 177 Lines • ▼ Show 20 Lines
	#include "RISCVGenDAGISel.inc"			#include "RISCVGenDAGISel.inc"

	private:			private:
	bool doPeepholeSExtW(SDNode *Node);			bool doPeepholeSExtW(SDNode *Node);
	bool doPeepholeMaskedRVV(SDNode *Node);			bool doPeepholeMaskedRVV(SDNode *Node);
	bool doPeepholeMergeVVMFold();			bool doPeepholeMergeVVMFold();
	bool performVMergeToVMv(SDNode *N);			bool performVMergeToVMv(SDNode *N);
	bool performCombineVMergeAndVOps(SDNode *N, bool IsTA);			bool performCombineVMergeAndVOps(SDNode *N, bool IsTA);
				bool performCombineVMvAndVOps(SDNode *N, bool IsTA);
	};			};

	namespace RISCV {			namespace RISCV {
	struct VLSEGPseudo {			struct VLSEGPseudo {
	uint16_t NF : 4;			uint16_t NF : 4;
	uint16_t Masked : 1;			uint16_t Masked : 1;
	uint16_t IsTU : 1;			uint16_t IsTU : 1;
	uint16_t Strided : 1;			uint16_t Strided : 1;
	▲ Show 20 Lines • Show All 86 Lines • Show Last 20 Lines

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Show First 20 Lines • Show All 3,446 Lines • ▼ Show 20 Lines	#define CASE_VMERGE_TO_VMV(lmul) \

SDNode *Result = CurDAG->getMachineNode(		SDNode *Result = CurDAG->getMachineNode(
NewOpc, SDLoc(N), N->getValueType(0),		NewOpc, SDLoc(N), N->getValueType(0),
{N->getOperand(1), N->getOperand(2), N->getOperand(4), N->getOperand(5)});		{N->getOperand(1), N->getOperand(2), N->getOperand(4), N->getOperand(5)});
ReplaceUses(N, Result);		ReplaceUses(N, Result);
return true;		return true;
}		}

		// A vmv.v.v can be thought of a vmerge, where instead of masking with a mask,
		// we're masking the tail with VL. For example, in the sequence below the
		// vmv.v.v copies over the first 2 elements from the vadd.vv:
		//
		// vsetivli zero, 4, e32, m1, ta, ma
		// vadd.vv v9, v10, v11
		// vsetivli zero, 2, e32, m1, tu, ma
		// vmv.v.v v8, v9
		//
		// This optimisation folds the vmv.v.v into the preceding op if it has only use:
		//
		// vsetivli zero, 2, e32, m1, ta, ma
		// vadd.vv v8, v10, v11
		//
		// In general, we can just replace the VL of the op with the VL of the vmv.v.v
		// (Unless it's a load, in which case we make sure we're only loading a VL less
		// than or equal to the original, so we don't end up loading more elements than
		// before)
		bool RISCVDAGToDAGISel::performCombineVMvAndVOps(SDNode *N, bool IsTA) {
		SDLoc DL(N);
		SDValue Passthru;
		if (IsTA)
		Passthru = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
		N->getValueType(0)),
		0);
		else
		Passthru = N->getOperand(0);
		SDValue Src = N->getOperand(IsTA ? 0 : 1);
		SDValue VL = N->getOperand(IsTA ? 1 : 2);
		assert(Src.getResNo() == 0 && "Src should be the first value of a node.");

		// TODO: We should peel off layers of COPY_TO_REGCLASS so we can
		// handle merges of different vec lengths

		// Src can only have one user, N.
		if (!Src.hasOneUse())
		return false;

		if (!Src.isMachineOpcode())
		return false;

		unsigned SrcOpc = Src.getMachineOpcode();

		// Only transform pseudos where we know there's a VL operand.
		if (!RISCVVPseudosTable::getPseudoInfo(SrcOpc))
		craig.topperUnsubmitted Done Reply Inline Actions Why aren't we using RISCVII::hasVLOp if you want to know there is a VL operand. craig.topper: Why aren't we using RISCVII::hasVLOp if you want to know there is a VL operand.
		return false;

		const MCInstrDesc &SrcMCID = TII->get(SrcOpc);
		bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(SrcMCID);
		// The last operand of a masked instruction may be glued.
		bool HasGlueOp = Src->getGluedNode() != nullptr;
		// The chain operand may exist either before the glued operands or in the last
		// position.
		unsigned SrcChainOpIdx = Src.getNumOperands() - HasGlueOp - 1;
		bool HasChainOp = Src.getOperand(SrcChainOpIdx).getValueType() == MVT::Other;
		bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(SrcMCID.TSFlags);
		unsigned SrcVLIndex =
		Src.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;

		unsigned NewOpc;

		// If Src has a passthru operand, then we can then reuse the same pseudo
		// opcode.
		reamesUnsubmitted Not Done Reply Inline Actions Please rebase this over D154620, and then use the exact same code structure as the vmerge transform. Having them different makes it very hard to follow. Hm, this is just a thought. But could we take the existing vmerge transform, and split it into two steps? If a vmv.v.v is like a vmerge with a all ones mask, could the vmerge transform become "first form vmv.v.v" and "then fold vmv.v.v into true"? We in fact already have both of these pieces, can we simply reorder and restructure? reames: Please rebase this over D154620, and then use the exact same code structure as the vmerge…
		reamesUnsubmitted Not Done Reply Inline Actions Ignore the just a thought bit. That handles the sub-case where the mask is all ones, but does not handle the arbitrary mask case. I got myself confused with thinking about the unmasking peephole, but that's a different bit of logic. reames: Ignore the just a thought bit. That handles the sub-case where the mask is all ones, but does…
		lukeAuthorUnsubmitted Done Reply Inline Actions Yeah, I don't think we can express all vmv.v.v as vmerges but I took another look at the specification and noticed this note in 11.16: The vector integer move instructions share the encoding with the vector merge instructions, but with vm=1 and vs2=v0. So a vmv.v.v isn't just like vmerge with an all ones mask, it literally is one (without a mask). I presume we can then generalise the existing vmerge fold to handle vmv.v.v, so I'll give that a try first luke: Yeah, I don't think we can express all vmv.v.v as vmerges but I took another look at the…
		if (HasTiedDest) {
		NewOpc = SrcOpc;
		// The Src passthru must be IMPLICIT_DEF or the same as the vmv.v.v
		// passthru.
		SDValue SrcPassthru = Src.getOperand(0);
		if (!isImplicitDef(SrcPassthru) && SrcPassthru != Passthru)
		return false;
		}
		// Otherwise if the Src pseudo doesn't have a passthrough operand, see if we
		// can find a variant of it that does.
		else if (const RISCV::RISCVMaskedPseudoInfo *Info =
		RISCV::lookupMaskedIntrinsicByUnmaskedTA(SrcOpc)) {

		NewOpc = Info->UnmaskedTUPseudo;
		}
		// Otherwise, there's no pseudo we can use to preserve the tail.
		else {
		return false;
		}

		if (SrcMCID.hasUnmodeledSideEffects())
		return false;

		if (this->mayRaiseFPException(Src.getNode()) &&
		!Src->getFlags().hasNoFPExcept())
		return false;

		// If the src is a load, we can only fold in the VL if it's a constant that's
		// the same size or smaller: Otherwise we might end up loading more elements
		// than before.
		if (SrcMCID.mayLoad()) {
		ConstantSDNode *VLN = dyn_cast<ConstantSDNode>(N->getOperand(2));
		if (!VLN)
		return false;

		SDValue SrcVL = Src.getOperand(SrcVLIndex);
		if (!isa<ConstantSDNode>(SrcVL) \|\|
		Src.getConstantOperandVal(SrcVLIndex) < VLN->getZExtValue())
		return false;
		}

		if (HasChainOp) {
		// Avoid creating cycles in the DAG. We must ensure that none of the other
		// operands depend on Src through it's Chain.
		SmallVector<const SDNode *, 4> LoopWorklist;
		SmallPtrSet<const SDNode *, 16> Visited;
		LoopWorklist.push_back(Passthru.getNode());
		LoopWorklist.push_back(VL.getNode());
		if (SDNode *Glued = N->getGluedNode())
		LoopWorklist.push_back(Glued);
		if (SDNode::hasPredecessorHelper(Src.getNode(), Visited, LoopWorklist))
		return false;
		}

		// Make sure the policy is tail undisturbed.
		uint64_t Policy;
		if (HasVecPolicyOp) {
		// Pseudo operands are VL, SEW, [Policy]
		uint64_t SrcPolicy = Src.getConstantOperandVal(SrcVLIndex + 2);
		Policy = SrcPolicy & -RISCVII::TAIL_AGNOSTIC;
		} else {
		Policy = RISCVII::MASK_AGNOSTIC;
		}

		SmallVector<SDValue, 8> Ops;
		Ops.push_back(Passthru);
		Ops.append(HasTiedDest ? Src->op_begin() + 1 : Src->op_begin(),
		Src->op_begin() + SrcVLIndex);
		Ops.append({VL, /* SEW */ Src.getOperand(SrcVLIndex + 1)});
		if (RISCVII::hasVecPolicyOp(TII->get(NewOpc).TSFlags))
		Ops.push_back(
		CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT()));

		// Result node should have chain operand of Src.
		if (HasChainOp)
		Ops.push_back(Src.getOperand(SrcChainOpIdx));

		// Copy over the old glue operand, or N's glue operand if it has one.
		if (HasGlueOp)
		Ops.push_back(Src.getOperand(Src->getNumOperands() - 1));
		else if (N->getGluedNode())
		Ops.push_back(N->getOperand(N->getNumOperands() - 1));

		SDNode *Result = CurDAG->getMachineNode(NewOpc, DL, Src->getVTList(), Ops);

		Result->setFlags(Src->getFlags());

		// Replace the old vmv.v.v node with Result.
		ReplaceUses(SDValue(N, 0), SDValue(Result, 0));

		// Replace Src's other values e.g. the Chain.
		for (unsigned Idx = 1; Idx < Src->getNumValues(); ++Idx)
		ReplaceUses(Src.getValue(Idx), SDValue(Result, Idx));

		// Try to transform Result to an unmasked pseudo, if it's masked.
		doPeepholeMaskedRVV(Result);
		return true;
		}

bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {		bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
bool MadeChange = false;		bool MadeChange = false;
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();		SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();

while (Position != CurDAG->allnodes_begin()) {		while (Position != CurDAG->allnodes_begin()) {
SDNode N = &--Position;		SDNode N = &--Position;
if (N->use_empty() \|\| !N->isMachineOpcode())		if (N->use_empty() \|\| !N->isMachineOpcode())
continue;		continue;
Show All 13 Lines	auto IsVMergeTA = [](unsigned Opcode) {
Opcode == RISCV::PseudoVMERGE_VVM_MF4 \|\|		Opcode == RISCV::PseudoVMERGE_VVM_MF4 \|\|
Opcode == RISCV::PseudoVMERGE_VVM_MF2 \|\|		Opcode == RISCV::PseudoVMERGE_VVM_MF2 \|\|
Opcode == RISCV::PseudoVMERGE_VVM_M1 \|\|		Opcode == RISCV::PseudoVMERGE_VVM_M1 \|\|
Opcode == RISCV::PseudoVMERGE_VVM_M2 \|\|		Opcode == RISCV::PseudoVMERGE_VVM_M2 \|\|
Opcode == RISCV::PseudoVMERGE_VVM_M4 \|\|		Opcode == RISCV::PseudoVMERGE_VVM_M4 \|\|
Opcode == RISCV::PseudoVMERGE_VVM_M8;		Opcode == RISCV::PseudoVMERGE_VVM_M8;
};		};

		auto IsVMvTU = [](unsigned Opcode) {
		return Opcode == RISCV::PseudoVMV_V_V_MF8_TU \|\|
		Opcode == RISCV::PseudoVMV_V_V_MF4_TU \|\|
		Opcode == RISCV::PseudoVMV_V_V_MF2_TU \|\|
		Opcode == RISCV::PseudoVMV_V_V_M1_TU \|\|
		Opcode == RISCV::PseudoVMV_V_V_M2_TU \|\|
		Opcode == RISCV::PseudoVMV_V_V_M4_TU \|\|
		Opcode == RISCV::PseudoVMV_V_V_M8_TU;
		};

		auto IsVMvTA = [](unsigned Opcode) {
		return Opcode == RISCV::PseudoVMV_V_V_MF8 \|\|
		Opcode == RISCV::PseudoVMV_V_V_MF4 \|\|
		Opcode == RISCV::PseudoVMV_V_V_MF2 \|\|
		Opcode == RISCV::PseudoVMV_V_V_M1 \|\|
		Opcode == RISCV::PseudoVMV_V_V_M2 \|\|
		Opcode == RISCV::PseudoVMV_V_V_M4 \|\|
		Opcode == RISCV::PseudoVMV_V_V_M8;
		};

unsigned Opc = N->getMachineOpcode();		unsigned Opc = N->getMachineOpcode();
if (IsVMergeTU(Opc) \|\| IsVMergeTA(Opc))		if (IsVMergeTU(Opc) \|\| IsVMergeTA(Opc))
MadeChange \|= performCombineVMergeAndVOps(N, IsVMergeTA(Opc));		MadeChange \|= performCombineVMergeAndVOps(N, IsVMergeTA(Opc));
if (IsVMergeTU(Opc) && N->getOperand(0) == N->getOperand(1))		if (IsVMergeTU(Opc) && N->getOperand(0) == N->getOperand(1))
MadeChange \|= performVMergeToVMv(N);		MadeChange \|= performVMergeToVMv(N);
		if (IsVMvTU(Opc) \|\| IsVMvTA(Opc))
		MadeChange \|= performCombineVMvAndVOps(N, IsVMvTA(Opc));
}		}
return MadeChange;		return MadeChange;
}		}

// This pass converts a legalized DAG into a RISCV-specific DAG, ready		// This pass converts a legalized DAG into a RISCV-specific DAG, ready
// for instruction scheduling.		// for instruction scheduling.
FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM,		FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM,
CodeGenOpt::Level OptLevel) {		CodeGenOpt::Level OptLevel) {
return new RISCVDAGToDAGISel(TM, OptLevel);		return new RISCVDAGToDAGISel(TM, OptLevel);
}		}

char RISCVDAGToDAGISel::ID = 0;		char RISCVDAGToDAGISel::ID = 0;

INITIALIZE_PASS(RISCVDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)		INITIALIZE_PASS(RISCVDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)

llvm/test/CodeGen/RISCV/rvv/combine-vmv.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
	; RUN: sed 's/iXLen/i32/g' %s \| llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \| FileCheck %s			; RUN: sed 's/iXLen/i32/g' %s \| llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \| FileCheck %s
	; RUN: sed 's/iXLen/i64/g' %s \| llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \| FileCheck %s			; RUN: sed 's/iXLen/i64/g' %s \| llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \| FileCheck %s

	declare <vscale x 4 x i32> @llvm.riscv.vmv.v.v.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, iXLen)			declare <vscale x 4 x i32> @llvm.riscv.vmv.v.v.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, iXLen)

	declare <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, iXLen)			declare <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, iXLen)

	define <vscale x 4 x i32> @vadd(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {			define <vscale x 4 x i32> @vadd(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
	; CHECK-LABEL: vadd:			; CHECK-LABEL: vadd:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
	; CHECK-NEXT: vadd.vv v10, v10, v12
	; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma			; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma
	; CHECK-NEXT: vmv.v.v v8, v10			; CHECK-NEXT: vadd.vv v8, v10, v12
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1)			%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1)
	%w = call <vscale x 4 x i32> @llvm.riscv.vmv.v.v.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, iXLen %vl2)			%w = call <vscale x 4 x i32> @llvm.riscv.vmv.v.v.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, iXLen %vl2)
	ret <vscale x 4 x i32> %w			ret <vscale x 4 x i32> %w
	}			}

	define <vscale x 4 x i32> @vadd_undef(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {			define <vscale x 4 x i32> @vadd_undef(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
	; CHECK-LABEL: vadd_undef:			; CHECK-LABEL: vadd_undef:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
	; CHECK-NEXT: vadd.vv v8, v8, v10
	; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma			; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
	; CHECK-NEXT: vmv.v.v v8, v8			; CHECK-NEXT: vadd.vv v8, v8, v10
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1)			%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1)
	%w = call <vscale x 4 x i32> @llvm.riscv.vmv.v.v.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, iXLen %vl2)			%w = call <vscale x 4 x i32> @llvm.riscv.vmv.v.v.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, iXLen %vl2)
	ret <vscale x 4 x i32> %w			ret <vscale x 4 x i32> %w
	}			}

	; TODO: Is this correct if there's already a passthru in the src?
	define <vscale x 4 x i32> @vadd_same_passthru(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {			define <vscale x 4 x i32> @vadd_same_passthru(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
	; CHECK-LABEL: vadd_same_passthru:			; CHECK-LABEL: vadd_same_passthru:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma
	; CHECK-NEXT: vmv2r.v v14, v8
	; CHECK-NEXT: vadd.vv v14, v10, v12
	; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma			; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma
	; CHECK-NEXT: vmv.v.v v8, v14			; CHECK-NEXT: vadd.vv v8, v10, v12
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1)			%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1)
	%w = call <vscale x 4 x i32> @llvm.riscv.vmv.v.v.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, iXLen %vl2)			%w = call <vscale x 4 x i32> @llvm.riscv.vmv.v.v.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, iXLen %vl2)
	ret <vscale x 4 x i32> %w			ret <vscale x 4 x i32> %w
	}			}

	declare <vscale x 4 x i32> @llvm.riscv.vadd.mask.nxv4i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, iXLen, iXLen)			declare <vscale x 4 x i32> @llvm.riscv.vadd.mask.nxv4i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, iXLen, iXLen)

	define <vscale x 4 x i32> @vadd_mask_ma(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %mask, iXLen %vl1, iXLen %vl2) {			define <vscale x 4 x i32> @vadd_mask_ma(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %mask, iXLen %vl1, iXLen %vl2) {
	; CHECK-LABEL: vadd_mask_ma:			; CHECK-LABEL: vadd_mask_ma:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
	; CHECK-NEXT: vadd.vv v10, v10, v12, v0.t
	; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma			; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma
	; CHECK-NEXT: vmv.v.v v8, v10			; CHECK-NEXT: vadd.vv v8, v10, v12, v0.t
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%v = call <vscale x 4 x i32> @llvm.riscv.vadd.mask.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %mask, iXLen %vl1, iXLen 2)			%v = call <vscale x 4 x i32> @llvm.riscv.vadd.mask.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %mask, iXLen %vl1, iXLen 2)
	%w = call <vscale x 4 x i32> @llvm.riscv.vmv.v.v.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, iXLen %vl2)			%w = call <vscale x 4 x i32> @llvm.riscv.vmv.v.v.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, iXLen %vl2)
	ret <vscale x 4 x i32> %w			ret <vscale x 4 x i32> %w
	}			}

	define <vscale x 4 x i32> @vadd_mask_mu(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %mask, iXLen %vl1, iXLen %vl2) {			define <vscale x 4 x i32> @vadd_mask_mu(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %mask, iXLen %vl1, iXLen %vl2) {
	; CHECK-LABEL: vadd_mask_mu:			; CHECK-LABEL: vadd_mask_mu:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma			; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, mu
	; CHECK-NEXT: vadd.vv v10, v10, v12, v0.t			; CHECK-NEXT: vadd.vv v8, v10, v12, v0.t
	; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma
	; CHECK-NEXT: vmv.v.v v8, v10
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%v = call <vscale x 4 x i32> @llvm.riscv.vadd.mask.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %mask, iXLen %vl1, iXLen 0)			%v = call <vscale x 4 x i32> @llvm.riscv.vadd.mask.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i1> %mask, iXLen %vl1, iXLen 0)
	%w = call <vscale x 4 x i32> @llvm.riscv.vmv.v.v.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, iXLen %vl2)			%w = call <vscale x 4 x i32> @llvm.riscv.vmv.v.v.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, iXLen %vl2)
	ret <vscale x 4 x i32> %w			ret <vscale x 4 x i32> %w
	}			}

	declare <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32(<vscale x 4 x i32>, ptr, iXLen)			declare <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32(<vscale x 4 x i32>, ptr, iXLen)

	define <vscale x 4 x i32> @foldable_load(<vscale x 4 x i32> %passthru, ptr %p) {			define <vscale x 4 x i32> @foldable_load(<vscale x 4 x i32> %passthru, ptr %p) {
	; CHECK-LABEL: foldable_load:			; CHECK-LABEL: foldable_load:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma
	; CHECK-NEXT: vle32.v v10, (a0)
	; CHECK-NEXT: vsetivli zero, 2, e32, m2, tu, ma			; CHECK-NEXT: vsetivli zero, 2, e32, m2, tu, ma
	; CHECK-NEXT: vmv.v.v v8, v10			; CHECK-NEXT: vle32.v v8, (a0)
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%v = call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32(<vscale x 4 x i32> poison, ptr %p, iXLen 4)			%v = call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32(<vscale x 4 x i32> poison, ptr %p, iXLen 4)
	%w = call <vscale x 4 x i32> @llvm.riscv.vmv.v.v.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, iXLen 2)			%w = call <vscale x 4 x i32> @llvm.riscv.vmv.v.v.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, iXLen 2)
	ret <vscale x 4 x i32> %w			ret <vscale x 4 x i32> %w
	}			}

	; Can't fold this as the VLs aren't constant.			; Can't fold this as the VLs aren't constant.
	define <vscale x 4 x i32> @unfoldable_load(<vscale x 4 x i32> %passthru, ptr %p, iXLen %vl1, iXLen %vl2) {			define <vscale x 4 x i32> @unfoldable_load(<vscale x 4 x i32> %passthru, ptr %p, iXLen %vl1, iXLen %vl2) {
	Show All 11 Lines

	declare <vscale x 4 x float> @llvm.riscv.vmv.v.v.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, iXLen)			declare <vscale x 4 x float> @llvm.riscv.vmv.v.v.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, iXLen)

	declare <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, iXLen)			declare <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, iXLen)

	define <vscale x 4 x float> @vfadd(<vscale x 4 x float> %passthru, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl1, iXLen %vl2) {			define <vscale x 4 x float> @vfadd(<vscale x 4 x float> %passthru, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl1, iXLen %vl2) {
	; CHECK-LABEL: vfadd:			; CHECK-LABEL: vfadd:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
	; CHECK-NEXT: vfadd.vv v10, v10, v12
	; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma			; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma
	; CHECK-NEXT: vmv.v.v v8, v10			; CHECK-NEXT: vfadd.vv v8, v10, v12
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%v = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl1)			%v = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %b, iXLen %vl1)
	%w = call <vscale x 4 x float> @llvm.riscv.vmv.v.v.nxv4f32(<vscale x 4 x float> %passthru, <vscale x 4 x float> %v, iXLen %vl2)			%w = call <vscale x 4 x float> @llvm.riscv.vmv.v.v.nxv4f32(<vscale x 4 x float> %passthru, <vscale x 4 x float> %v, iXLen %vl2)
	ret <vscale x 4 x float> %w			ret <vscale x 4 x float> %w
	}			}

llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll

	Show First 20 Lines • Show All 464 Lines • ▼ Show 20 Lines
	}			}

	define <vscale x 6 x half> @extract_nxv6f16_nxv12f16_6(<vscale x 12 x half> %in) {			define <vscale x 6 x half> @extract_nxv6f16_nxv12f16_6(<vscale x 12 x half> %in) {
	; CHECK-LABEL: extract_nxv6f16_nxv12f16_6:			; CHECK-LABEL: extract_nxv6f16_nxv12f16_6:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: csrr a0, vlenb			; CHECK-NEXT: csrr a0, vlenb
	; CHECK-NEXT: srli a0, a0, 2			; CHECK-NEXT: srli a0, a0, 2
	; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma			; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
	; CHECK-NEXT: vslidedown.vx v11, v10, a0
	; CHECK-NEXT: vslidedown.vx v8, v9, a0			; CHECK-NEXT: vslidedown.vx v8, v9, a0
	; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
	; CHECK-NEXT: vmv.v.v v9, v11
	; CHECK-NEXT: add a1, a0, a0			; CHECK-NEXT: add a1, a0, a0
	; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma			; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, ma
	; CHECK-NEXT: vslideup.vx v8, v10, a0			; CHECK-NEXT: vslideup.vx v8, v10, a0
				; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
				; CHECK-NEXT: vslidedown.vx v9, v10, a0
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 6 x half> @llvm.vector.extract.nxv6f16.nxv12f16(<vscale x 12 x half> %in, i64 6)			%res = call <vscale x 6 x half> @llvm.vector.extract.nxv6f16.nxv12f16(<vscale x 12 x half> %in, i64 6)
	ret <vscale x 6 x half> %res			ret <vscale x 6 x half> %res
	}			}

	declare <vscale x 6 x half> @llvm.vector.extract.nxv6f16.nxv12f16(<vscale x 12 x half>, i64)			declare <vscale x 6 x half> @llvm.vector.extract.nxv6f16.nxv12f16(<vscale x 12 x half>, i64)

	declare <vscale x 1 x i8> @llvm.vector.extract.nxv1i8.nxv4i8(<vscale x 4 x i8> %vec, i64 %idx)			declare <vscale x 1 x i8> @llvm.vector.extract.nxv1i8.nxv4i8(<vscale x 4 x i8> %vec, i64 %idx)
	Show All 21 Lines

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector-shuffle.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2		; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mattr=+v -mtriple=riscv32 -verify-machineinstrs < %s \| FileCheck %s		; RUN: llc -mattr=+v -mtriple=riscv32 -verify-machineinstrs < %s \| FileCheck %s
; RUN: llc -mattr=+v -mtriple=riscv64 -verify-machineinstrs < %s \| FileCheck %s		; RUN: llc -mattr=+v -mtriple=riscv64 -verify-machineinstrs < %s \| FileCheck %s

define <4 x i32> @insert_subvector_load_v4i32_v4i32(<4 x i32> %v1, ptr %p) {		define <4 x i32> @insert_subvector_load_v4i32_v4i32(<4 x i32> %v1, ptr %p) {
; CHECK-LABEL: insert_subvector_load_v4i32_v4i32:		; CHECK-LABEL: insert_subvector_load_v4i32_v4i32:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v9, (a0)
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma		; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; CHECK-NEXT: vmv.v.v v8, v9		; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%v2 = load <4 x i32>, ptr %p		%v2 = load <4 x i32>, ptr %p
%v3 = shufflevector <4 x i32> %v2, <4 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>		%v3 = shufflevector <4 x i32> %v2, <4 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
ret <4 x i32> %v3		ret <4 x i32> %v3
}		}

declare <4 x i32> @llvm.vp.load.v4i32(ptr, <4 x i1>, i32)		declare <4 x i32> @llvm.vp.load.v4i32(ptr, <4 x i1>, i32)
define <4 x i32> @insert_subvector_vp_load_v4i32_v4i32(<4 x i32> %v1, ptr %p, <4 x i1> %mask) {		define <4 x i32> @insert_subvector_vp_load_v4i32_v4i32(<4 x i32> %v1, ptr %p, <4 x i1> %mask) {
; CHECK-LABEL: insert_subvector_vp_load_v4i32_v4i32:		; CHECK-LABEL: insert_subvector_vp_load_v4i32_v4i32:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma		; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, mu
; CHECK-NEXT: vle32.v v9, (a0), v0.t		; CHECK-NEXT: vle32.v v8, (a0), v0.t
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%v2 = call <4 x i32> @llvm.vp.load.v4i32(ptr %p, <4 x i1> %mask, i32 4)		%v2 = call <4 x i32> @llvm.vp.load.v4i32(ptr %p, <4 x i1> %mask, i32 4)
%v3 = shufflevector <4 x i32> %v2, <4 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>		%v3 = shufflevector <4 x i32> %v2, <4 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
ret <4 x i32> %v3		ret <4 x i32> %v3
}		}

; Can't fold this in because the load has a non-undef passthru that isn't equal to the vmv.v.v passtrhu		; Can't fold this in because the load has a non-undef passthru that isn't equal to the vmv.v.v passtrhu
declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32, <4 x i1>, <4 x i32>)		declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32, <4 x i1>, <4 x i32>)
Show All 9 Lines	; CHECK-NEXT: ret
%v3 = shufflevector <4 x i32> %v2, <4 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>		%v3 = shufflevector <4 x i32> %v2, <4 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
ret <4 x i32> %v3		ret <4 x i32> %v3
}		}

; Can fold this in because the load has a non-undef passthru, but it's equal to the vmv.v.v passtrhu		; Can fold this in because the load has a non-undef passthru, but it's equal to the vmv.v.v passtrhu
define <4 x i32> @insert_subvector_load_foldable_passthru_v4i32_v4i32(<4 x i32> %v1, ptr %p, <4 x i1> %mask) {		define <4 x i32> @insert_subvector_load_foldable_passthru_v4i32_v4i32(<4 x i32> %v1, ptr %p, <4 x i1> %mask) {
; CHECK-LABEL: insert_subvector_load_foldable_passthru_v4i32_v4i32:		; CHECK-LABEL: insert_subvector_load_foldable_passthru_v4i32_v4i32:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu		; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, mu
; CHECK-NEXT: vmv1r.v v9, v8		; CHECK-NEXT: vle32.v v8, (a0), v0.t
; CHECK-NEXT: vle32.v v9, (a0), v0.t
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%v2 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %p, i32 4, <4 x i1> %mask, <4 x i32> %v1)		%v2 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %p, i32 4, <4 x i1> %mask, <4 x i32> %v1)
%v3 = shufflevector <4 x i32> %v2, <4 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>		%v3 = shufflevector <4 x i32> %v2, <4 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
ret <4 x i32> %v3		ret <4 x i32> %v3
}		}

define <4 x i32> @insert_subvector_add_v4i32_v4i32(<4 x i32> %v1, <4 x i32> %v2) {		define <4 x i32> @insert_subvector_add_v4i32_v4i32(<4 x i32> %v1, <4 x i32> %v2) {
; CHECK-LABEL: insert_subvector_add_v4i32_v4i32:		; CHECK-LABEL: insert_subvector_add_v4i32_v4i32:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma		; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vid.v v10		; CHECK-NEXT: vid.v v10
; CHECK-NEXT: vadd.vv v9, v9, v10
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma		; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; CHECK-NEXT: vmv.v.v v8, v9		; CHECK-NEXT: vadd.vv v8, v9, v10
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%v3 = add <4 x i32> %v2, <i32 0, i32 1, i32 2, i32 3>		%v3 = add <4 x i32> %v2, <i32 0, i32 1, i32 2, i32 3>
%v4 = shufflevector <4 x i32> %v3, <4 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>		%v4 = shufflevector <4 x i32> %v3, <4 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
ret <4 x i32> %v4		ret <4 x i32> %v4
}		}

declare <4 x i32> @llvm.vp.add.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)		declare <4 x i32> @llvm.vp.add.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
define <4 x i32> @insert_subvector_vp_add_v4i32_v4i32(<4 x i32> %v1, <4 x i32> %v2, <4 x i1> %mask) {		define <4 x i32> @insert_subvector_vp_add_v4i32_v4i32(<4 x i32> %v1, <4 x i32> %v2, <4 x i1> %mask) {
; CHECK-LABEL: insert_subvector_vp_add_v4i32_v4i32:		; CHECK-LABEL: insert_subvector_vp_add_v4i32_v4i32:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma		; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, mu
; CHECK-NEXT: vadd.vi v9, v9, 1, v0.t		; CHECK-NEXT: vadd.vi v8, v9, 1, v0.t
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%v3 = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %v2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i1> %mask, i32 4)		%v3 = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %v2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i1> %mask, i32 4)
%v4 = shufflevector <4 x i32> %v3, <4 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>		%v4 = shufflevector <4 x i32> %v3, <4 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
ret <4 x i32> %v4		ret <4 x i32> %v4
}		}

define <4 x i32> @insert_subvector_load_v4i32_v2i32(<4 x i32> %v1, ptr %p) {		define <4 x i32> @insert_subvector_load_v4i32_v2i32(<4 x i32> %v1, ptr %p) {
; CHECK-LABEL: insert_subvector_load_v4i32_v2i32:		; CHECK-LABEL: insert_subvector_load_v4i32_v2i32:
▲ Show 20 Lines • Show All 52 Lines • ▼ Show 20 Lines	; CHECK-NEXT: ret
%v4 = shufflevector <2 x i32> %v3, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>		%v4 = shufflevector <2 x i32> %v3, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
%v5 = shufflevector <4 x i32> %v4, <4 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>		%v5 = shufflevector <4 x i32> %v4, <4 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
ret <4 x i32> %v5		ret <4 x i32> %v5
}		}

define <4 x i32> @insert_subvector_load_v4i32_v8i32(<4 x i32> %v1, ptr %p) {		define <4 x i32> @insert_subvector_load_v4i32_v8i32(<4 x i32> %v1, ptr %p) {
; CHECK-LABEL: insert_subvector_load_v4i32_v8i32:		; CHECK-LABEL: insert_subvector_load_v4i32_v8i32:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vle32.v v9, (a0)
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma		; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; CHECK-NEXT: vmv.v.v v8, v9		; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%v2 = load <8 x i32>, ptr %p		%v2 = load <8 x i32>, ptr %p
%v3 = shufflevector <8 x i32> %v2, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>		%v3 = shufflevector <8 x i32> %v2, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
%v4 = shufflevector <4 x i32> %v3, <4 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>		%v4 = shufflevector <4 x i32> %v3, <4 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
ret <4 x i32> %v4		ret <4 x i32> %v4
}		}

declare <8 x i32> @llvm.vp.load.v8i32(ptr, <8 x i1>, i32)		declare <8 x i32> @llvm.vp.load.v8i32(ptr, <8 x i1>, i32)
Show All 11 Lines	; CHECK-NEXT: ret
ret <4 x i32> %v4		ret <4 x i32> %v4
}		}

define <4 x i32> @insert_subvector_add_v4i32_v8i32(<4 x i32> %v1, <8 x i32> %v2) {		define <4 x i32> @insert_subvector_add_v4i32_v8i32(<4 x i32> %v1, <8 x i32> %v2) {
; CHECK-LABEL: insert_subvector_add_v4i32_v8i32:		; CHECK-LABEL: insert_subvector_add_v4i32_v8i32:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma		; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vid.v v9		; CHECK-NEXT: vid.v v9
; CHECK-NEXT: vadd.vv v9, v10, v9
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma		; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; CHECK-NEXT: vmv.v.v v8, v9		; CHECK-NEXT: vadd.vv v8, v10, v9
; CHECK-NEXT: ret		; CHECK-NEXT: ret
%v3 = add <8 x i32> %v2, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>		%v3 = add <8 x i32> %v2, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%v4 = shufflevector <8 x i32> %v3, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>		%v4 = shufflevector <8 x i32> %v3, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
%v5 = shufflevector <4 x i32> %v4, <4 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>		%v5 = shufflevector <4 x i32> %v4, <4 x i32> %v1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
ret <4 x i32> %v5		ret <4 x i32> %v5
}		}

declare <8 x i32> @llvm.vp.add.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)		declare <8 x i32> @llvm.vp.add.v8i32(<8 x i32>, <8 x i32>, <8 x i1>, i32)
Show All 30 Lines

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll

	Show First 20 Lines • Show All 457 Lines • ▼ Show 20 Lines
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma			; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
	; CHECK-NEXT: vlm.v v8, (a0)			; CHECK-NEXT: vlm.v v8, (a0)
	; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma			; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
	; CHECK-NEXT: vmv.v.i v9, 0			; CHECK-NEXT: vmv.v.i v9, 0
	; CHECK-NEXT: vmerge.vim v9, v9, 1, v0			; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
	; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma			; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
	; CHECK-NEXT: vmv.v.i v10, 0			; CHECK-NEXT: vmv.v.i v10, 0
	; CHECK-NEXT: vmv1r.v v0, v8
	; CHECK-NEXT: vmerge.vim v8, v10, 1, v0
	; CHECK-NEXT: vsetvli zero, zero, e8, mf4, tu, ma			; CHECK-NEXT: vsetvli zero, zero, e8, mf4, tu, ma
	; CHECK-NEXT: vmv.v.v v9, v8			; CHECK-NEXT: vmv1r.v v0, v8
				; CHECK-NEXT: vmerge.vim v9, v10, 1, v0
	; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma			; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
	; CHECK-NEXT: vmsne.vi v0, v9, 0			; CHECK-NEXT: vmsne.vi v0, v9, 0
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%sv = load <4 x i1>, ptr %svp			%sv = load <4 x i1>, ptr %svp
	%c = call <vscale x 2 x i1> @llvm.vector.insert.v4i1.nxv2i1(<vscale x 2 x i1> %v, <4 x i1> %sv, i64 0)			%c = call <vscale x 2 x i1> @llvm.vector.insert.v4i1.nxv2i1(<vscale x 2 x i1> %v, <4 x i1> %sv, i64 0)
	ret <vscale x 2 x i1> %c			ret <vscale x 2 x i1> %c
	}			}

	▲ Show 20 Lines • Show All 164 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[RISCV] Fold vmv.v.v into vops
AbandonedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 532933

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

llvm/test/CodeGen/RISCV/rvv/combine-vmv.ll

llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector-shuffle.ll

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll

This is an archive of the discontinued LLVM Phabricator instance.

[RISCV] Fold vmv.v.v into vopsAbandonedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 532933

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

llvm/test/CodeGen/RISCV/rvv/combine-vmv.ll

llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector-shuffle.ll

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll

[RISCV] Fold vmv.v.v into vops
AbandonedPublic