This is an archive of the discontinued LLVM Phabricator instance.

Differential D3605

[ARM64/AArch64] Port NEON post-increment load/store with 2/3/4 vectors to ARM64 backend
Needs ReviewPublic

Authored by • HaoLiu on May 5 2014, 1:18 AM.

Download Raw Diff

Details

Reviewers

t.p.northover

Summary

Hi Tim and other reviewers,

This patch ports all NEON post-increment load/store with 2/3/4 vectors to ARM64 backend, including following post increment instructions:
LD1 - Load multiple 1-element structures to two, three or four consecutive registers
LD2 - Load multiple 2-element structures to two consecutive registers
LD3 - Load multiple 3-element structures to three consecutive registers
LD4 - Load multiple 4-element structures to four consecutive registers
LD2 - Load single 2-element structure to one lane of two consecutive registers
LD3 - Load single 3-element structure to one lane of three consecutive registers
LD4 - Load single 4-element structure to one lane of four consecutive registers
LD2R - Load single 2-element structure and replicate to all lanes of two registers
LD3R - Load single 3-element structure and replicate to all lanes of three registers
LD4R - Load single 4-element structure and replicate to all lanes of four registers
ST1 - Store multiple 1-element structures from two, three or four consecutive registers
ST2 - Store multiple 2-element structures from two consecutive registers
ST3 - Store multiple 3-element structures from three consecutive registers
ST4 - Store multiple 4-element structures from four consecutive registers
ST2 - Store single 2-element structure from one lane of two consecutive registers
ST3 - Store single 3-element structure from one lane of three consecutive registers
ST4 - Store single 4-element structure from one lane of four consecutive registers

BTW, I just think the implementation in ARM64DAGToDAGISel::Select has some redundancy. Every time for an intrinsic/ISDNode, it compare to 12 types from v16i8 to v2f64 and call corresponding select function such as SelectLoad, SelectStore. If we call SelectLoad/SelectStore directly and compare the types inside, we can reduce some code. Anyway, it just something about code structure and has nothing to do with correctness. I don't modify it. I just use the same way to call SelectPostLoad as call SelectLoad.

Code review, please.

Thanks,
-Hao

Diff Detail

Event Timeline

• HaoLiu updated this revision to Diff 9064.May 5 2014, 1:18 AM

• HaoLiu retitled this revision from to [ARM64/AArch64] Port NEON post-increment load/store with 2/3/4 vectors to ARM64 backend.

• HaoLiu updated this object.

• HaoLiu edited the test plan for this revision. (Show Details)

• HaoLiu added a reviewer: t.p.northover.

• HaoLiu added a subscriber: Unknown Object (MLST).

Herald added a subscriber: aemerson. · View Herald TranscriptMay 5 2014, 1:18 AM

Hi Hao,

Thanks for working on this. I've got some comments, mostly extremely minor nits (I can't see anything actually wrong with the code).

I do agree about that repeated type checking. It's rather untidy, but it's in the style of the surrounding code so we can probably deal with that separately.

Cheers.

Tim.

lib/Target/ARM64/ARM64ISelDAGToDAG.cpp
1068	Commented out code.
1219	Wouldn't this be neater as a for loop? static unsigned QSubs[] = { ARM64::qsub0, ARM64::qsub1, ARM64::qsub2, ARM64::qsub3 }; for (int i = 0; i < NumVecs; ++i) { SDValue NV = CurDAG->getTargetExtractSubReg(QSubs[i], dl, WideVT, SuperReg); if (Narrow) NV = NarrowVector(NV, *CurDAG); ReplaceUses(SDValue(N, i), NV); } (Untested).
lib/Target/ARM64/ARM64ISelLowering.cpp
7017–7019	Functions usually start with a lower-case letter, and indentation is a bit wonky here.

Hi Tim,

This patch has been refactored according your comments and has been committed in http://llvm.org/viewvc/llvm-project?view=revision&revision=208284.

Thanks,
-Hao

Revision Contents

Path

Size

lib/

Target/

ARM64/

ARM64ISelDAGToDAG.cpp

562 lines

ARM64ISelLowering.h

25 lines

ARM64ISelLowering.cpp

190 lines

test/

CodeGen/

ARM64/

indexed-vector-ldst.ll

5077 lines

Diff 9064

lib/Target/ARM64/ARM64ISelDAGToDAG.cpp

Context not available.

	SDNode SelectLoad(SDNode N, unsigned NumVecs, unsigned Opc,	SDNode SelectLoad(SDNode N, unsigned NumVecs, unsigned Opc,
	unsigned SubRegIdx);	unsigned SubRegIdx);
		SDNode SelectPostLoad(SDNode N, unsigned NumVecs, unsigned Opc,
		unsigned SubRegIdx);
	SDNode SelectLoadLane(SDNode N, unsigned NumVecs, unsigned Opc);	SDNode SelectLoadLane(SDNode N, unsigned NumVecs, unsigned Opc);
		SDNode SelectPostLoadLane(SDNode N, unsigned NumVecs, unsigned Opc);

	SDNode SelectStore(SDNode N, unsigned NumVecs, unsigned Opc);	SDNode SelectStore(SDNode N, unsigned NumVecs, unsigned Opc);
		SDNode SelectPostStore(SDNode N, unsigned NumVecs, unsigned Opc);
	SDNode SelectStoreLane(SDNode N, unsigned NumVecs, unsigned Opc);	SDNode SelectStoreLane(SDNode N, unsigned NumVecs, unsigned Opc);
		SDNode SelectPostStoreLane(SDNode N, unsigned NumVecs, unsigned Opc);

	SDNode SelectSIMDAddSubNarrowing(unsigned IntNo, SDNode Node);	SDNode SelectSIMDAddSubNarrowing(unsigned IntNo, SDNode Node);
	SDNode SelectSIMDXtnNarrowing(unsigned IntNo, SDNode Node);	SDNode SelectSIMDXtnNarrowing(unsigned IntNo, SDNode Node);
Context not available.
	return nullptr;	return nullptr;
	}	}

		SDNode ARM64DAGToDAGISel::SelectPostLoad(SDNode N, unsigned NumVecs,
		unsigned Opc, unsigned SubRegIdx) {
		SDLoc dl(N);
		EVT VT = N->getValueType(0);
		SDValue Chain = N->getOperand(0);

		SmallVector<SDValue, 6> Ops;
		Ops.push_back(N->getOperand(1)); // Mem operand
		Ops.push_back(N->getOperand(2)); // Incremental
		Ops.push_back(Chain);

		std::vector<EVT> ResTys;
		ResTys.push_back(MVT::i64); // Type of the write back register
		ResTys.push_back(MVT::Untyped);
		ResTys.push_back(MVT::Other);

		SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);

		// Update uses of write back register
		ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));

		// Update uses of vector list
		SDValue SuperReg = SDValue(Ld, 1);
		switch (NumVecs) {
		case 4:
		ReplaceUses(SDValue(N, 3), CurDAG->getTargetExtractSubreg(SubRegIdx + 3, dl,
		VT, SuperReg));
		// FALLTHROUGH
		case 3:
		ReplaceUses(SDValue(N, 2), CurDAG->getTargetExtractSubreg(SubRegIdx + 2, dl,
		VT, SuperReg));
		// FALLTHROUGH
		case 2:
		ReplaceUses(SDValue(N, 1), CurDAG->getTargetExtractSubreg(SubRegIdx + 1, dl,
		VT, SuperReg));
		ReplaceUses(SDValue(N, 0),
		CurDAG->getTargetExtractSubreg(SubRegIdx, dl, VT, SuperReg));
		break;
		}

		// Update the chain
		ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
		return nullptr;
		}

	SDNode ARM64DAGToDAGISel::SelectStore(SDNode N, unsigned NumVecs,	SDNode ARM64DAGToDAGISel::SelectStore(SDNode N, unsigned NumVecs,
	unsigned Opc) {	unsigned Opc) {
	SDLoc dl(N);	SDLoc dl(N);
Context not available.
	return St;	return St;
	}	}

		SDNode ARM64DAGToDAGISel::SelectPostStore(SDNode N, unsigned NumVecs,
		unsigned Opc) {
		SDLoc dl(N);
		EVT VT = N->getOperand(2)->getValueType(0);
		SmallVector<EVT, 2> ResTys;
		ResTys.push_back(MVT::i64); // Type of the write back register
		ResTys.push_back(MVT::Other); // Type for the Chain

		// Form a REG_SEQUENCE to force register allocation.
		bool Is128Bit = VT.getSizeInBits() == 128;
		SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
		SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);

		SmallVector<SDValue, 6> Ops;
		// Ops.push_back(N->getOperand(1)); // write back register
		t.p.northoverUnsubmitted Not Done Reply Inline Actions Commented out code. t.p.northover: Commented out code.
		Ops.push_back(RegSeq);
		Ops.push_back(N->getOperand(NumVecs + 1)); // base register
		Ops.push_back(N->getOperand(NumVecs + 2)); // Incremental
		Ops.push_back(N->getOperand(0)); // Chain
		SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);

		return St;
		}

	/// WidenVector - Given a value in the V64 register class, produce the	/// WidenVector - Given a value in the V64 register class, produce the
	/// equivalent value in the V128 register class.	/// equivalent value in the V128 register class.
	class WidenVector {	class WidenVector {
Context not available.
	return Ld;	return Ld;
	}	}

		SDNode ARM64DAGToDAGISel::SelectPostLoadLane(SDNode N, unsigned NumVecs,
		unsigned Opc) {
		SDLoc dl(N);
		EVT VT = N->getValueType(0);
		bool Narrow = VT.getSizeInBits() == 64;

		// Form a REG_SEQUENCE to force register allocation.
		SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);

		if (Narrow)
		std::transform(Regs.begin(), Regs.end(), Regs.begin(),
		WidenVector(*CurDAG));

		SDValue RegSeq = createQTuple(Regs);

		std::vector<EVT> ResTys;
		ResTys.push_back(MVT::i64); // Type of the write back register
		ResTys.push_back(MVT::Untyped);
		ResTys.push_back(MVT::Other);

		unsigned LaneNo =
		cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();

		SmallVector<SDValue, 6> Ops;
		Ops.push_back(RegSeq);
		Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); // Lane Number
		Ops.push_back(N->getOperand(NumVecs + 2)); // Base register
		Ops.push_back(N->getOperand(NumVecs + 3)); // Incremental
		Ops.push_back(N->getOperand(0));
		SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);

		// Update uses of the write back register
		ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));

		// Update uses of the vector list
		SDValue SuperReg = SDValue(Ld, 1);
		EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
		switch (NumVecs) {
		t.p.northoverUnsubmitted Not Done Reply Inline Actions Wouldn't this be neater as a for loop? static unsigned QSubs[] = { ARM64::qsub0, ARM64::qsub1, ARM64::qsub2, ARM64::qsub3 }; for (int i = 0; i < NumVecs; ++i) { SDValue NV = CurDAG->getTargetExtractSubReg(QSubs[i], dl, WideVT, SuperReg); if (Narrow) NV = NarrowVector(NV, CurDAG); ReplaceUses(SDValue(N, i), NV); } (Untested). t.p.northover:* Wouldn't this be neater as a for loop? static unsigned QSubs[] = { ARM64::qsub0, ARM64…
		case 4: {
		SDValue NV3 =
		CurDAG->getTargetExtractSubreg(ARM64::qsub3, dl, WideVT, SuperReg);
		if (Narrow)
		ReplaceUses(SDValue(N, 3), NarrowVector(NV3, *CurDAG));
		else
		ReplaceUses(SDValue(N, 3), NV3);
		}
		// FALLTHROUGH
		case 3: {
		SDValue NV2 =
		CurDAG->getTargetExtractSubreg(ARM64::qsub2, dl, WideVT, SuperReg);
		if (Narrow)
		ReplaceUses(SDValue(N, 2), NarrowVector(NV2, *CurDAG));
		else
		ReplaceUses(SDValue(N, 2), NV2);
		}
		// FALLTHROUGH
		case 2: {
		SDValue NV1 =
		CurDAG->getTargetExtractSubreg(ARM64::qsub1, dl, WideVT, SuperReg);
		SDValue NV0 =
		CurDAG->getTargetExtractSubreg(ARM64::qsub0, dl, WideVT, SuperReg);
		if (Narrow) {
		ReplaceUses(SDValue(N, 1), NarrowVector(NV1, *CurDAG));
		ReplaceUses(SDValue(N, 0), NarrowVector(NV0, *CurDAG));
		} else {
		ReplaceUses(SDValue(N, 1), NV1);
		ReplaceUses(SDValue(N, 0), NV0);
		}
		break;
		}
		}

		// Update the Chain
		ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));

		return Ld;
		}

	SDNode ARM64DAGToDAGISel::SelectStoreLane(SDNode N, unsigned NumVecs,	SDNode ARM64DAGToDAGISel::SelectStoreLane(SDNode N, unsigned NumVecs,
	unsigned Opc) {	unsigned Opc) {
	SDLoc dl(N);	SDLoc dl(N);
Context not available.
	return St;	return St;
	}	}

		SDNode ARM64DAGToDAGISel::SelectPostStoreLane(SDNode N, unsigned NumVecs,
		unsigned Opc) {
		SDLoc dl(N);
		EVT VT = N->getOperand(2)->getValueType(0);
		bool Narrow = VT.getSizeInBits() == 64;

		// Form a REG_SEQUENCE to force register allocation.
		SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);

		if (Narrow)
		std::transform(Regs.begin(), Regs.end(), Regs.begin(),
		WidenVector(*CurDAG));

		SDValue RegSeq = createQTuple(Regs);

		SmallVector<EVT, 2> ResTys;
		ResTys.push_back(MVT::i64); // Type of the write back register
		ResTys.push_back(MVT::Other);

		unsigned LaneNo =
		cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();

		SmallVector<SDValue, 6> Ops;
		Ops.push_back(RegSeq);
		Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
		Ops.push_back(N->getOperand(NumVecs + 2)); // Base Register
		Ops.push_back(N->getOperand(NumVecs + 3)); // Incremental
		Ops.push_back(N->getOperand(0));
		SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);

		// Transfer memoperands.
		MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
		MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
		cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);

		return St;
		}

	static bool isBitfieldExtractOpFromAnd(SelectionDAG CurDAG, SDNode N,	static bool isBitfieldExtractOpFromAnd(SelectionDAG CurDAG, SDNode N,
	unsigned &Opc, SDValue &Opd0,	unsigned &Opc, SDValue &Opd0,
	unsigned &LSB, unsigned &MSB,	unsigned &LSB, unsigned &MSB,
Context not available.
	}	}
	}	}
	}	}
		case ARM64ISD::LD2post: {
		if (VT == MVT::v8i8)
		return SelectPostLoad(Node, 2, ARM64::LD2Twov8b_POST, ARM64::dsub0);
		else if (VT == MVT::v16i8)
		return SelectPostLoad(Node, 2, ARM64::LD2Twov16b_POST, ARM64::qsub0);
		else if (VT == MVT::v4i16)
		return SelectPostLoad(Node, 2, ARM64::LD2Twov4h_POST, ARM64::dsub0);
		else if (VT == MVT::v8i16)
		return SelectPostLoad(Node, 2, ARM64::LD2Twov8h_POST, ARM64::qsub0);
		else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
		return SelectPostLoad(Node, 2, ARM64::LD2Twov2s_POST, ARM64::dsub0);
		else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
		return SelectPostLoad(Node, 2, ARM64::LD2Twov4s_POST, ARM64::qsub0);
		else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
		return SelectPostLoad(Node, 2, ARM64::LD1Twov1d_POST, ARM64::dsub0);
		else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
		return SelectPostLoad(Node, 2, ARM64::LD2Twov2d_POST, ARM64::qsub0);
		break;
		}
		case ARM64ISD::LD3post: {
		if (VT == MVT::v8i8)
		return SelectPostLoad(Node, 3, ARM64::LD3Threev8b_POST, ARM64::dsub0);
		else if (VT == MVT::v16i8)
		return SelectPostLoad(Node, 3, ARM64::LD3Threev16b_POST, ARM64::qsub0);
		else if (VT == MVT::v4i16)
		return SelectPostLoad(Node, 3, ARM64::LD3Threev4h_POST, ARM64::dsub0);
		else if (VT == MVT::v8i16)
		return SelectPostLoad(Node, 3, ARM64::LD3Threev8h_POST, ARM64::qsub0);
		else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
		return SelectPostLoad(Node, 3, ARM64::LD3Threev2s_POST, ARM64::dsub0);
		else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
		return SelectPostLoad(Node, 3, ARM64::LD3Threev4s_POST, ARM64::qsub0);
		else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
		return SelectPostLoad(Node, 3, ARM64::LD1Threev1d_POST, ARM64::dsub0);
		else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
		return SelectPostLoad(Node, 3, ARM64::LD3Threev2d_POST, ARM64::qsub0);
		break;
		}
		case ARM64ISD::LD4post: {
		if (VT == MVT::v8i8)
		return SelectPostLoad(Node, 4, ARM64::LD4Fourv8b_POST, ARM64::dsub0);
		else if (VT == MVT::v16i8)
		return SelectPostLoad(Node, 4, ARM64::LD4Fourv16b_POST, ARM64::qsub0);
		else if (VT == MVT::v4i16)
		return SelectPostLoad(Node, 4, ARM64::LD4Fourv4h_POST, ARM64::dsub0);
		else if (VT == MVT::v8i16)
		return SelectPostLoad(Node, 4, ARM64::LD4Fourv8h_POST, ARM64::qsub0);
		else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
		return SelectPostLoad(Node, 4, ARM64::LD4Fourv2s_POST, ARM64::dsub0);
		else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
		return SelectPostLoad(Node, 4, ARM64::LD4Fourv4s_POST, ARM64::qsub0);
		else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
		return SelectPostLoad(Node, 4, ARM64::LD1Fourv1d_POST, ARM64::dsub0);
		else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
		return SelectPostLoad(Node, 4, ARM64::LD4Fourv2d_POST, ARM64::qsub0);
		break;
		}
		case ARM64ISD::LD1x2post: {
		if (VT == MVT::v8i8)
		return SelectPostLoad(Node, 2, ARM64::LD1Twov8b_POST, ARM64::dsub0);
		else if (VT == MVT::v16i8)
		return SelectPostLoad(Node, 2, ARM64::LD1Twov16b_POST, ARM64::qsub0);
		else if (VT == MVT::v4i16)
		return SelectPostLoad(Node, 2, ARM64::LD1Twov4h_POST, ARM64::dsub0);
		else if (VT == MVT::v8i16)
		return SelectPostLoad(Node, 2, ARM64::LD1Twov8h_POST, ARM64::qsub0);
		else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
		return SelectPostLoad(Node, 2, ARM64::LD1Twov2s_POST, ARM64::dsub0);
		else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
		return SelectPostLoad(Node, 2, ARM64::LD1Twov4s_POST, ARM64::qsub0);
		else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
		return SelectPostLoad(Node, 2, ARM64::LD1Twov1d_POST, ARM64::dsub0);
		else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
		return SelectPostLoad(Node, 2, ARM64::LD1Twov2d_POST, ARM64::qsub0);
		break;
		}
		case ARM64ISD::LD1x3post: {
		if (VT == MVT::v8i8)
		return SelectPostLoad(Node, 3, ARM64::LD1Threev8b_POST, ARM64::dsub0);
		else if (VT == MVT::v16i8)
		return SelectPostLoad(Node, 3, ARM64::LD1Threev16b_POST, ARM64::qsub0);
		else if (VT == MVT::v4i16)
		return SelectPostLoad(Node, 3, ARM64::LD1Threev4h_POST, ARM64::dsub0);
		else if (VT == MVT::v8i16)
		return SelectPostLoad(Node, 3, ARM64::LD1Threev8h_POST, ARM64::qsub0);
		else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
		return SelectPostLoad(Node, 3, ARM64::LD1Threev2s_POST, ARM64::dsub0);
		else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
		return SelectPostLoad(Node, 3, ARM64::LD1Threev4s_POST, ARM64::qsub0);
		else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
		return SelectPostLoad(Node, 3, ARM64::LD1Threev1d_POST, ARM64::dsub0);
		else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
		return SelectPostLoad(Node, 3, ARM64::LD1Threev2d_POST, ARM64::qsub0);
		break;
		}
		case ARM64ISD::LD1x4post: {
		if (VT == MVT::v8i8)
		return SelectPostLoad(Node, 4, ARM64::LD1Fourv8b_POST, ARM64::dsub0);
		else if (VT == MVT::v16i8)
		return SelectPostLoad(Node, 4, ARM64::LD1Fourv16b_POST, ARM64::qsub0);
		else if (VT == MVT::v4i16)
		return SelectPostLoad(Node, 4, ARM64::LD1Fourv4h_POST, ARM64::dsub0);
		else if (VT == MVT::v8i16)
		return SelectPostLoad(Node, 4, ARM64::LD1Fourv8h_POST, ARM64::qsub0);
		else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
		return SelectPostLoad(Node, 4, ARM64::LD1Fourv2s_POST, ARM64::dsub0);
		else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
		return SelectPostLoad(Node, 4, ARM64::LD1Fourv4s_POST, ARM64::qsub0);
		else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
		return SelectPostLoad(Node, 4, ARM64::LD1Fourv1d_POST, ARM64::dsub0);
		else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
		return SelectPostLoad(Node, 4, ARM64::LD1Fourv2d_POST, ARM64::qsub0);
		break;
		}
		case ARM64ISD::LD2DUPpost: {
		if (VT == MVT::v8i8)
		return SelectPostLoad(Node, 2, ARM64::LD2Rv8b_POST, ARM64::dsub0);
		else if (VT == MVT::v16i8)
		return SelectPostLoad(Node, 2, ARM64::LD2Rv16b_POST, ARM64::qsub0);
		else if (VT == MVT::v4i16)
		return SelectPostLoad(Node, 2, ARM64::LD2Rv4h_POST, ARM64::dsub0);
		else if (VT == MVT::v8i16)
		return SelectPostLoad(Node, 2, ARM64::LD2Rv8h_POST, ARM64::qsub0);
		else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
		return SelectPostLoad(Node, 2, ARM64::LD2Rv2s_POST, ARM64::dsub0);
		else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
		return SelectPostLoad(Node, 2, ARM64::LD2Rv4s_POST, ARM64::qsub0);
		else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
		return SelectPostLoad(Node, 2, ARM64::LD2Rv1d_POST, ARM64::dsub0);
		else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
		return SelectPostLoad(Node, 2, ARM64::LD2Rv2d_POST, ARM64::qsub0);
		break;
		}
		case ARM64ISD::LD3DUPpost: {
		if (VT == MVT::v8i8)
		return SelectPostLoad(Node, 3, ARM64::LD3Rv8b_POST, ARM64::dsub0);
		else if (VT == MVT::v16i8)
		return SelectPostLoad(Node, 3, ARM64::LD3Rv16b_POST, ARM64::qsub0);
		else if (VT == MVT::v4i16)
		return SelectPostLoad(Node, 3, ARM64::LD3Rv4h_POST, ARM64::dsub0);
		else if (VT == MVT::v8i16)
		return SelectPostLoad(Node, 3, ARM64::LD3Rv8h_POST, ARM64::qsub0);
		else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
		return SelectPostLoad(Node, 3, ARM64::LD3Rv2s_POST, ARM64::dsub0);
		else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
		return SelectPostLoad(Node, 3, ARM64::LD3Rv4s_POST, ARM64::qsub0);
		else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
		return SelectPostLoad(Node, 3, ARM64::LD3Rv1d_POST, ARM64::dsub0);
		else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
		return SelectPostLoad(Node, 3, ARM64::LD3Rv2d_POST, ARM64::qsub0);
		break;
		}
		case ARM64ISD::LD4DUPpost: {
		if (VT == MVT::v8i8)
		return SelectPostLoad(Node, 4, ARM64::LD4Rv8b_POST, ARM64::dsub0);
		else if (VT == MVT::v16i8)
		return SelectPostLoad(Node, 4, ARM64::LD4Rv16b_POST, ARM64::qsub0);
		else if (VT == MVT::v4i16)
		return SelectPostLoad(Node, 4, ARM64::LD4Rv4h_POST, ARM64::dsub0);
		else if (VT == MVT::v8i16)
		return SelectPostLoad(Node, 4, ARM64::LD4Rv8h_POST, ARM64::qsub0);
		else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
		return SelectPostLoad(Node, 4, ARM64::LD4Rv2s_POST, ARM64::dsub0);
		else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
		return SelectPostLoad(Node, 4, ARM64::LD4Rv4s_POST, ARM64::qsub0);
		else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
		return SelectPostLoad(Node, 4, ARM64::LD4Rv1d_POST, ARM64::dsub0);
		else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
		return SelectPostLoad(Node, 4, ARM64::LD4Rv2d_POST, ARM64::qsub0);
		break;
		}
		case ARM64ISD::LD2LANEpost: {
		if (VT == MVT::v16i8 \|\| VT == MVT::v8i8)
		return SelectPostLoadLane(Node, 2, ARM64::LD2i8_POST);
		else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16)
		return SelectPostLoadLane(Node, 2, ARM64::LD2i16_POST);
		else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
		VT == MVT::v2f32)
		return SelectPostLoadLane(Node, 2, ARM64::LD2i32_POST);
		else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
		VT == MVT::v1f64)
		return SelectPostLoadLane(Node, 2, ARM64::LD2i64_POST);
		break;
		}
		case ARM64ISD::LD3LANEpost: {
		if (VT == MVT::v16i8 \|\| VT == MVT::v8i8)
		return SelectPostLoadLane(Node, 3, ARM64::LD3i8_POST);
		else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16)
		return SelectPostLoadLane(Node, 3, ARM64::LD3i16_POST);
		else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
		VT == MVT::v2f32)
		return SelectPostLoadLane(Node, 3, ARM64::LD3i32_POST);
		else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
		VT == MVT::v1f64)
		return SelectPostLoadLane(Node, 3, ARM64::LD3i64_POST);
		break;
		}
		case ARM64ISD::LD4LANEpost: {
		if (VT == MVT::v16i8 \|\| VT == MVT::v8i8)
		return SelectPostLoadLane(Node, 4, ARM64::LD4i8_POST);
		else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16)
		return SelectPostLoadLane(Node, 4, ARM64::LD4i16_POST);
		else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
		VT == MVT::v2f32)
		return SelectPostLoadLane(Node, 4, ARM64::LD4i32_POST);
		else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
		VT == MVT::v1f64)
		return SelectPostLoadLane(Node, 4, ARM64::LD4i64_POST);
		break;
		}
		case ARM64ISD::ST2post: {
		VT = Node->getOperand(1).getValueType();
		if (VT == MVT::v8i8)
		return SelectPostStore(Node, 2, ARM64::ST2Twov8b_POST);
		else if (VT == MVT::v16i8)
		return SelectPostStore(Node, 2, ARM64::ST2Twov16b_POST);
		else if (VT == MVT::v4i16)
		return SelectPostStore(Node, 2, ARM64::ST2Twov4h_POST);
		else if (VT == MVT::v8i16)
		return SelectPostStore(Node, 2, ARM64::ST2Twov8h_POST);
		else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
		return SelectPostStore(Node, 2, ARM64::ST2Twov2s_POST);
		else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
		return SelectPostStore(Node, 2, ARM64::ST2Twov4s_POST);
		else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
		return SelectPostStore(Node, 2, ARM64::ST2Twov2d_POST);
		else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
		return SelectPostStore(Node, 2, ARM64::ST1Twov1d_POST);
		break;
		}
		case ARM64ISD::ST3post: {
		VT = Node->getOperand(1).getValueType();
		if (VT == MVT::v8i8)
		return SelectPostStore(Node, 3, ARM64::ST3Threev8b_POST);
		else if (VT == MVT::v16i8)
		return SelectPostStore(Node, 3, ARM64::ST3Threev16b_POST);
		else if (VT == MVT::v4i16)
		return SelectPostStore(Node, 3, ARM64::ST3Threev4h_POST);
		else if (VT == MVT::v8i16)
		return SelectPostStore(Node, 3, ARM64::ST3Threev8h_POST);
		else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
		return SelectPostStore(Node, 3, ARM64::ST3Threev2s_POST);
		else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
		return SelectPostStore(Node, 3, ARM64::ST3Threev4s_POST);
		else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
		return SelectPostStore(Node, 3, ARM64::ST3Threev2d_POST);
		else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
		return SelectPostStore(Node, 3, ARM64::ST1Threev1d_POST);
		break;
		}
		case ARM64ISD::ST4post: {
		VT = Node->getOperand(1).getValueType();
		if (VT == MVT::v8i8)
		return SelectPostStore(Node, 4, ARM64::ST4Fourv8b_POST);
		else if (VT == MVT::v16i8)
		return SelectPostStore(Node, 4, ARM64::ST4Fourv16b_POST);
		else if (VT == MVT::v4i16)
		return SelectPostStore(Node, 4, ARM64::ST4Fourv4h_POST);
		else if (VT == MVT::v8i16)
		return SelectPostStore(Node, 4, ARM64::ST4Fourv8h_POST);
		else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
		return SelectPostStore(Node, 4, ARM64::ST4Fourv2s_POST);
		else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
		return SelectPostStore(Node, 4, ARM64::ST4Fourv4s_POST);
		else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
		return SelectPostStore(Node, 4, ARM64::ST4Fourv2d_POST);
		else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
		return SelectPostStore(Node, 4, ARM64::ST1Fourv1d_POST);
		break;
		}
		case ARM64ISD::ST1x2post: {
		VT = Node->getOperand(1).getValueType();
		if (VT == MVT::v8i8)
		return SelectPostStore(Node, 2, ARM64::ST1Twov8b_POST);
		else if (VT == MVT::v16i8)
		return SelectPostStore(Node, 2, ARM64::ST1Twov16b_POST);
		else if (VT == MVT::v4i16)
		return SelectPostStore(Node, 2, ARM64::ST1Twov4h_POST);
		else if (VT == MVT::v8i16)
		return SelectPostStore(Node, 2, ARM64::ST1Twov8h_POST);
		else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
		return SelectPostStore(Node, 2, ARM64::ST1Twov2s_POST);
		else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
		return SelectPostStore(Node, 2, ARM64::ST1Twov4s_POST);
		else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
		return SelectPostStore(Node, 2, ARM64::ST1Twov1d_POST);
		else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
		return SelectPostStore(Node, 2, ARM64::ST1Twov2d_POST);
		break;
		}
		case ARM64ISD::ST1x3post: {
		VT = Node->getOperand(1).getValueType();
		if (VT == MVT::v8i8)
		return SelectPostStore(Node, 3, ARM64::ST1Threev8b_POST);
		else if (VT == MVT::v16i8)
		return SelectPostStore(Node, 3, ARM64::ST1Threev16b_POST);
		else if (VT == MVT::v4i16)
		return SelectPostStore(Node, 3, ARM64::ST1Threev4h_POST);
		else if (VT == MVT::v8i16)
		return SelectPostStore(Node, 3, ARM64::ST1Threev8h_POST);
		else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
		return SelectPostStore(Node, 3, ARM64::ST1Threev2s_POST);
		else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
		return SelectPostStore(Node, 3, ARM64::ST1Threev4s_POST);
		else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
		return SelectPostStore(Node, 3, ARM64::ST1Threev1d_POST);
		else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
		return SelectPostStore(Node, 3, ARM64::ST1Threev2d_POST);
		break;
		}
		case ARM64ISD::ST1x4post: {
		VT = Node->getOperand(1).getValueType();
		if (VT == MVT::v8i8)
		return SelectPostStore(Node, 4, ARM64::ST1Fourv8b_POST);
		else if (VT == MVT::v16i8)
		return SelectPostStore(Node, 4, ARM64::ST1Fourv16b_POST);
		else if (VT == MVT::v4i16)
		return SelectPostStore(Node, 4, ARM64::ST1Fourv4h_POST);
		else if (VT == MVT::v8i16)
		return SelectPostStore(Node, 4, ARM64::ST1Fourv8h_POST);
		else if (VT == MVT::v2i32 \|\| VT == MVT::v2f32)
		return SelectPostStore(Node, 4, ARM64::ST1Fourv2s_POST);
		else if (VT == MVT::v4i32 \|\| VT == MVT::v4f32)
		return SelectPostStore(Node, 4, ARM64::ST1Fourv4s_POST);
		else if (VT == MVT::v1i64 \|\| VT == MVT::v1f64)
		return SelectPostStore(Node, 4, ARM64::ST1Fourv1d_POST);
		else if (VT == MVT::v2i64 \|\| VT == MVT::v2f64)
		return SelectPostStore(Node, 4, ARM64::ST1Fourv2d_POST);
		break;
		}
		case ARM64ISD::ST2LANEpost: {
		VT = Node->getOperand(1).getValueType();
		if (VT == MVT::v16i8 \|\| VT == MVT::v8i8)
		return SelectPostStoreLane(Node, 2, ARM64::ST2i8_POST);
		else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16)
		return SelectPostStoreLane(Node, 2, ARM64::ST2i16_POST);
		else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
		VT == MVT::v2f32)
		return SelectPostStoreLane(Node, 2, ARM64::ST2i32_POST);
		else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
		VT == MVT::v1f64)
		return SelectPostStoreLane(Node, 2, ARM64::ST2i64_POST);
		break;
		}
		case ARM64ISD::ST3LANEpost: {
		VT = Node->getOperand(1).getValueType();
		if (VT == MVT::v16i8 \|\| VT == MVT::v8i8)
		return SelectPostStoreLane(Node, 3, ARM64::ST3i8_POST);
		else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16)
		return SelectPostStoreLane(Node, 3, ARM64::ST3i16_POST);
		else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
		VT == MVT::v2f32)
		return SelectPostStoreLane(Node, 3, ARM64::ST3i32_POST);
		else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
		VT == MVT::v1f64)
		return SelectPostStoreLane(Node, 3, ARM64::ST3i64_POST);
		break;
		}
		case ARM64ISD::ST4LANEpost: {
		VT = Node->getOperand(1).getValueType();
		if (VT == MVT::v16i8 \|\| VT == MVT::v8i8)
		return SelectPostStoreLane(Node, 4, ARM64::ST4i8_POST);
		else if (VT == MVT::v8i16 \|\| VT == MVT::v4i16)
		return SelectPostStoreLane(Node, 4, ARM64::ST4i16_POST);
		else if (VT == MVT::v4i32 \|\| VT == MVT::v2i32 \|\| VT == MVT::v4f32 \|\|
		VT == MVT::v2f32)
		return SelectPostStoreLane(Node, 4, ARM64::ST4i32_POST);
		else if (VT == MVT::v2i64 \|\| VT == MVT::v1i64 \|\| VT == MVT::v2f64 \|\|
		VT == MVT::v1f64)
		return SelectPostStoreLane(Node, 4, ARM64::ST4i64_POST);
		break;
		}

	case ISD::FCEIL:	case ISD::FCEIL:
	case ISD::FFLOOR:	case ISD::FFLOOR:
Context not available.

lib/Target/ARM64/ARM64ISelLowering.h

Context not available.

	// {s\|u}int to FP within a FP register.	// {s\|u}int to FP within a FP register.
	SITOF,	SITOF,
	UITOF	UITOF,

		// NEON Load/Store with post-increment base updates
		LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
		LD3post,
		LD4post,
		ST2post,
		ST3post,
		ST4post,
		LD1x2post,
		LD1x3post,
		LD1x4post,
		ST1x2post,
		ST1x3post,
		ST1x4post,
		LD2DUPpost,
		LD3DUPpost,
		LD4DUPpost,
		LD2LANEpost,
		LD3LANEpost,
		LD4LANEpost,
		ST2LANEpost,
		ST3LANEpost,
		ST4LANEpost
	};	};

	} // end namespace ARM64ISD	} // end namespace ARM64ISD
Context not available.

lib/Target/ARM64/ARM64ISelLowering.cpp

Context not available.

	setTargetDAGCombine(ISD::VSELECT);	setTargetDAGCombine(ISD::VSELECT);

		setTargetDAGCombine(ISD::INTRINSIC_VOID);
		setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);

	MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8;	MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8;
	MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4;	MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4;
	MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 4;	MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 4;
Context not available.
	case ARM64ISD::URSHR_I: return "ARM64ISD::URSHR_I";	case ARM64ISD::URSHR_I: return "ARM64ISD::URSHR_I";
	case ARM64ISD::SQSHLU_I: return "ARM64ISD::SQSHLU_I";	case ARM64ISD::SQSHLU_I: return "ARM64ISD::SQSHLU_I";
	case ARM64ISD::WrapperLarge: return "ARM64ISD::WrapperLarge";	case ARM64ISD::WrapperLarge: return "ARM64ISD::WrapperLarge";
		case ARM64ISD::LD2post: return "ARM64ISD::LD2post";
		case ARM64ISD::LD3post: return "ARM64ISD::LD3post";
		case ARM64ISD::LD4post: return "ARM64ISD::LD4post";
		case ARM64ISD::ST2post: return "ARM64ISD::ST2post";
		case ARM64ISD::ST3post: return "ARM64ISD::ST3post";
		case ARM64ISD::ST4post: return "ARM64ISD::ST4post";
		case ARM64ISD::LD1x2post: return "ARM64ISD::LD1x2post";
		case ARM64ISD::LD1x3post: return "ARM64ISD::LD1x3post";
		case ARM64ISD::LD1x4post: return "ARM64ISD::LD1x4post";
		case ARM64ISD::ST1x2post: return "ARM64ISD::ST1x2post";
		case ARM64ISD::ST1x3post: return "ARM64ISD::ST1x3post";
		case ARM64ISD::ST1x4post: return "ARM64ISD::ST1x4post";
		case ARM64ISD::LD2DUPpost: return "ARM64ISD::LD2DUPpost";
		case ARM64ISD::LD3DUPpost: return "ARM64ISD::LD3DUPpost";
		case ARM64ISD::LD4DUPpost: return "ARM64ISD::LD4DUPpost";
		case ARM64ISD::LD2LANEpost: return "ARM64ISD::LD2LANEpost";
		case ARM64ISD::LD3LANEpost: return "ARM64ISD::LD3LANEpost";
		case ARM64ISD::LD4LANEpost: return "ARM64ISD::LD4LANEpost";
		case ARM64ISD::ST2LANEpost: return "ARM64ISD::ST2LANEpost";
		case ARM64ISD::ST3LANEpost: return "ARM64ISD::ST3LANEpost";
		case ARM64ISD::ST4LANEpost: return "ARM64ISD::ST4LANEpost";
	}	}
	}	}

Context not available.
	case Intrinsic::arm64_neon_ld2:	case Intrinsic::arm64_neon_ld2:
	case Intrinsic::arm64_neon_ld3:	case Intrinsic::arm64_neon_ld3:
	case Intrinsic::arm64_neon_ld4:	case Intrinsic::arm64_neon_ld4:
		case Intrinsic::arm64_neon_ld1x2:
		case Intrinsic::arm64_neon_ld1x3:
		case Intrinsic::arm64_neon_ld1x4:
	case Intrinsic::arm64_neon_ld2lane:	case Intrinsic::arm64_neon_ld2lane:
	case Intrinsic::arm64_neon_ld3lane:	case Intrinsic::arm64_neon_ld3lane:
	case Intrinsic::arm64_neon_ld4lane:	case Intrinsic::arm64_neon_ld4lane:
Context not available.
	case Intrinsic::arm64_neon_st2:	case Intrinsic::arm64_neon_st2:
	case Intrinsic::arm64_neon_st3:	case Intrinsic::arm64_neon_st3:
	case Intrinsic::arm64_neon_st4:	case Intrinsic::arm64_neon_st4:
		case Intrinsic::arm64_neon_st1x2:
		case Intrinsic::arm64_neon_st1x3:
		case Intrinsic::arm64_neon_st1x4:
	case Intrinsic::arm64_neon_st2lane:	case Intrinsic::arm64_neon_st2lane:
	case Intrinsic::arm64_neon_st3lane:	case Intrinsic::arm64_neon_st3lane:
	case Intrinsic::arm64_neon_st4lane: {	case Intrinsic::arm64_neon_st4lane: {
Context not available.
	S->getAlignment());	S->getAlignment());
	}	}

		/// Target-specific DAG combine function for NEON load/store intrinsics
		/// to merge base address updates.
		static SDValue CombineNEONPostLoadStore(SDNode *N,
		TargetLowering::DAGCombinerInfo &DCI,
		SelectionDAG &DAG) {
		t.p.northoverUnsubmitted Not Done Reply Inline Actions Functions usually start with a lower-case letter, and indentation is a bit wonky here. t.p.northover: Functions usually start with a lower-case letter, and indentation is a bit wonky here.
		if (DCI.isBeforeLegalize() \|\| DCI.isCalledByLegalizer())
		return SDValue();

		unsigned AddrOpIdx = N->getNumOperands() - 1;
		SDValue Addr = N->getOperand(AddrOpIdx);

		// Search for a use of the address operand that is an increment.
		for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
		UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
		SDNode User = UI;
		if (User->getOpcode() != ISD::ADD \|\|
		UI.getUse().getResNo() != Addr.getResNo())
		continue;

		// Check that the add is independent of the load/store. Otherwise, folding
		// it would create a cycle.
		if (User->isPredecessorOf(N) \|\| N->isPredecessorOf(User))
		continue;

		// Find the new opcode for the updating load/store.
		bool IsStore = false;
		bool IsLaneOp = false;
		bool IsDupOp = false;
		unsigned NewOpc = 0;
		unsigned NumVecs = 0;
		unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
		switch (IntNo) {
		default: llvm_unreachable("unexpected intrinsic for Neon base update");
		case Intrinsic::arm64_neon_ld2: NewOpc = ARM64ISD::LD2post;
		NumVecs = 2; break;
		case Intrinsic::arm64_neon_ld3: NewOpc = ARM64ISD::LD3post;
		NumVecs = 3; break;
		case Intrinsic::arm64_neon_ld4: NewOpc = ARM64ISD::LD4post;
		NumVecs = 4; break;
		case Intrinsic::arm64_neon_st2: NewOpc = ARM64ISD::ST2post;
		NumVecs = 2; IsStore = true; break;
		case Intrinsic::arm64_neon_st3: NewOpc = ARM64ISD::ST3post;
		NumVecs = 3; IsStore = true; break;
		case Intrinsic::arm64_neon_st4: NewOpc = ARM64ISD::ST4post;
		NumVecs = 4; IsStore = true; break;
		case Intrinsic::arm64_neon_ld1x2: NewOpc = ARM64ISD::LD1x2post;
		NumVecs = 2; break;
		case Intrinsic::arm64_neon_ld1x3: NewOpc = ARM64ISD::LD1x3post;
		NumVecs = 3; break;
		case Intrinsic::arm64_neon_ld1x4: NewOpc = ARM64ISD::LD1x4post;
		NumVecs = 4; break;
		case Intrinsic::arm64_neon_st1x2: NewOpc = ARM64ISD::ST1x2post;
		NumVecs = 2; IsStore = true; break;
		case Intrinsic::arm64_neon_st1x3: NewOpc = ARM64ISD::ST1x3post;
		NumVecs = 3; IsStore = true; break;
		case Intrinsic::arm64_neon_st1x4: NewOpc = ARM64ISD::ST1x4post;
		NumVecs = 4; IsStore = true; break;
		case Intrinsic::arm64_neon_ld2r: NewOpc = ARM64ISD::LD2DUPpost;
		NumVecs = 2; IsDupOp = true; break;
		case Intrinsic::arm64_neon_ld3r: NewOpc = ARM64ISD::LD3DUPpost;
		NumVecs = 3; IsDupOp = true; break;
		case Intrinsic::arm64_neon_ld4r: NewOpc = ARM64ISD::LD4DUPpost;
		NumVecs = 4; IsDupOp = true; break;
		case Intrinsic::arm64_neon_ld2lane: NewOpc = ARM64ISD::LD2LANEpost;
		NumVecs = 2; IsLaneOp = true; break;
		case Intrinsic::arm64_neon_ld3lane: NewOpc = ARM64ISD::LD3LANEpost;
		NumVecs = 3; IsLaneOp = true; break;
		case Intrinsic::arm64_neon_ld4lane: NewOpc = ARM64ISD::LD4LANEpost;
		NumVecs = 4; IsLaneOp = true; break;
		case Intrinsic::arm64_neon_st2lane: NewOpc = ARM64ISD::ST2LANEpost;
		NumVecs = 2; IsStore = true; IsLaneOp = true; break;
		case Intrinsic::arm64_neon_st3lane: NewOpc = ARM64ISD::ST3LANEpost;
		NumVecs = 3; IsStore = true; IsLaneOp = true; break;
		case Intrinsic::arm64_neon_st4lane: NewOpc = ARM64ISD::ST4LANEpost;
		NumVecs = 4; IsStore = true; IsLaneOp = true; break;
		}

		EVT VecTy;
		if (IsStore)
		VecTy = N->getOperand(2).getValueType();
		else
		VecTy = N->getValueType(0);

		// If the increment is a constant, it must match the memory ref size.
		SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
		if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
		uint32_t IncVal = CInc->getZExtValue();
		unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
		if (IsLaneOp \|\| IsDupOp)
		NumBytes /= VecTy.getVectorNumElements();
		if (IncVal != NumBytes)
		continue;
		Inc = DAG.getRegister(ARM64::XZR, MVT::i64);
		}
		SmallVector<SDValue, 8> Ops;
		Ops.push_back(N->getOperand(0)); // Incoming chain
		// Load lane and store have vector list as input.
		if (IsLaneOp \|\| IsStore)
		for (unsigned i = 2; i < AddrOpIdx; ++i)
		Ops.push_back(N->getOperand(i));
		Ops.push_back(N->getOperand(AddrOpIdx)); // Base register
		Ops.push_back(Inc);

		// Return Types.
		EVT Tys[6];
		unsigned NumResultVecs = (IsStore ? 0 : NumVecs);
		unsigned n;
		for (n = 0; n < NumResultVecs; ++n)
		Tys[n] = VecTy;
		Tys[n++] = MVT::i64; // Type of write back register
		Tys[n] = MVT::Other; // Type of the chain
		SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumResultVecs + 2));

		MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
		SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops,
		MemInt->getMemoryVT(),
		MemInt->getMemOperand());

		// Update the uses.
		std::vector<SDValue> NewResults;
		for (unsigned i = 0; i < NumResultVecs; ++i) {
		NewResults.push_back(SDValue(UpdN.getNode(), i));
		}
		NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1));
		DCI.CombineTo(N, NewResults);
		DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));

		break;
		}
		return SDValue();
		}

	// Optimize compare with zero and branch.	// Optimize compare with zero and branch.
	static SDValue performBRCONDCombine(SDNode *N,	static SDValue performBRCONDCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,	TargetLowering::DAGCombinerInfo &DCI,
Context not available.
	return performSTORECombine(N, DCI, DAG, Subtarget);	return performSTORECombine(N, DCI, DAG, Subtarget);
	case ARM64ISD::BRCOND:	case ARM64ISD::BRCOND:
	return performBRCONDCombine(N, DCI, DAG);	return performBRCONDCombine(N, DCI, DAG);
		case ISD::INTRINSIC_VOID:
		case ISD::INTRINSIC_W_CHAIN:
		switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
		case Intrinsic::arm64_neon_ld2:
		case Intrinsic::arm64_neon_ld3:
		case Intrinsic::arm64_neon_ld4:
		case Intrinsic::arm64_neon_ld1x2:
		case Intrinsic::arm64_neon_ld1x3:
		case Intrinsic::arm64_neon_ld1x4:
		case Intrinsic::arm64_neon_ld2lane:
		case Intrinsic::arm64_neon_ld3lane:
		case Intrinsic::arm64_neon_ld4lane:
		case Intrinsic::arm64_neon_ld2r:
		case Intrinsic::arm64_neon_ld3r:
		case Intrinsic::arm64_neon_ld4r:
		case Intrinsic::arm64_neon_st2:
		case Intrinsic::arm64_neon_st3:
		case Intrinsic::arm64_neon_st4:
		case Intrinsic::arm64_neon_st1x2:
		case Intrinsic::arm64_neon_st1x3:
		case Intrinsic::arm64_neon_st1x4:
		case Intrinsic::arm64_neon_st2lane:
		case Intrinsic::arm64_neon_st3lane:
		case Intrinsic::arm64_neon_st4lane:
		return CombineNEONPostLoadStore(N, DCI, DAG);
		default:
		break;
		}
	}	}
	return SDValue();	return SDValue();
	}	}
Context not available.

test/CodeGen/ARM64/indexed-vector-ldst.ll

Context not available.
	%newaddr = getelementptr float* %addr, i32 2	%newaddr = getelementptr float* %addr, i32 2
	ret float* %newaddr	ret float* %newaddr
	}	}

		define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2(i8* %A, i8** %ptr) {
		;CHECK-LABEL: test_v16i8_post_imm_ld2:
		;CHECK: ld2.16b { v0, v1 }, [x0], #32
		%ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2.v16i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i32 32
		store i8* %tmp, i8** %ptr
		ret { <16 x i8>, <16 x i8> } %ld2
		}

		define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2(i8* %A, i8** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v16i8_post_reg_ld2:
		;CHECK: ld2.16b { v0, v1 }, [x0], x{{[0-9]+}}
		%ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2.v16i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		store i8* %tmp, i8** %ptr
		ret { <16 x i8>, <16 x i8> } %ld2
		}

		declare { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2.v16i8.p0i8(i8*)


		define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2(i8* %A, i8** %ptr) {
		;CHECK-LABEL: test_v8i8_post_imm_ld2:
		;CHECK: ld2.8b { v0, v1 }, [x0], #16
		%ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i32 16
		store i8* %tmp, i8** %ptr
		ret { <8 x i8>, <8 x i8> } %ld2
		}

		define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2(i8* %A, i8** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v8i8_post_reg_ld2:
		;CHECK: ld2.8b { v0, v1 }, [x0], x{{[0-9]+}}
		%ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		store i8* %tmp, i8** %ptr
		ret { <8 x i8>, <8 x i8> } %ld2
		}

		declare { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2.v8i8.p0i8(i8*)


		define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2(i16* %A, i16** %ptr) {
		;CHECK-LABEL: test_v8i16_post_imm_ld2:
		;CHECK: ld2.8h { v0, v1 }, [x0], #32
		%ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld2.v8i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i32 16
		store i16* %tmp, i16** %ptr
		ret { <8 x i16>, <8 x i16> } %ld2
		}

		define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2(i16* %A, i16** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v8i16_post_reg_ld2:
		;CHECK: ld2.8h { v0, v1 }, [x0], x{{[0-9]+}}
		%ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld2.v8i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		store i16* %tmp, i16** %ptr
		ret { <8 x i16>, <8 x i16> } %ld2
		}

		declare { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld2.v8i16.p0i16(i16*)


		define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2(i16* %A, i16** %ptr) {
		;CHECK-LABEL: test_v4i16_post_imm_ld2:
		;CHECK: ld2.4h { v0, v1 }, [x0], #16
		%ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld2.v4i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i32 8
		store i16* %tmp, i16** %ptr
		ret { <4 x i16>, <4 x i16> } %ld2
		}

		define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2(i16* %A, i16** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v4i16_post_reg_ld2:
		;CHECK: ld2.4h { v0, v1 }, [x0], x{{[0-9]+}}
		%ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld2.v4i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		store i16* %tmp, i16** %ptr
		ret { <4 x i16>, <4 x i16> } %ld2
		}

		declare { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld2.v4i16.p0i16(i16*)


		define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2(i32* %A, i32** %ptr) {
		;CHECK-LABEL: test_v4i32_post_imm_ld2:
		;CHECK: ld2.4s { v0, v1 }, [x0], #32
		%ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2.v4i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i32 8
		store i32* %tmp, i32** %ptr
		ret { <4 x i32>, <4 x i32> } %ld2
		}

		define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2(i32* %A, i32** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v4i32_post_reg_ld2:
		;CHECK: ld2.4s { v0, v1 }, [x0], x{{[0-9]+}}
		%ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2.v4i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		store i32* %tmp, i32** %ptr
		ret { <4 x i32>, <4 x i32> } %ld2
		}

		declare { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2.v4i32.p0i32(i32*)


		define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2(i32* %A, i32** %ptr) {
		;CHECK-LABEL: test_v2i32_post_imm_ld2:
		;CHECK: ld2.2s { v0, v1 }, [x0], #16
		%ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2.v2i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i32 4
		store i32* %tmp, i32** %ptr
		ret { <2 x i32>, <2 x i32> } %ld2
		}

		define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2(i32* %A, i32** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v2i32_post_reg_ld2:
		;CHECK: ld2.2s { v0, v1 }, [x0], x{{[0-9]+}}
		%ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2.v2i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		store i32* %tmp, i32** %ptr
		ret { <2 x i32>, <2 x i32> } %ld2
		}

		declare { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2.v2i32.p0i32(i32*)


		define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2(i64* %A, i64** %ptr) {
		;CHECK-LABEL: test_v2i64_post_imm_ld2:
		;CHECK: ld2.2d { v0, v1 }, [x0], #32
		%ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld2.v2i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i32 4
		store i64* %tmp, i64** %ptr
		ret { <2 x i64>, <2 x i64> } %ld2
		}

		define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2(i64* %A, i64** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v2i64_post_reg_ld2:
		;CHECK: ld2.2d { v0, v1 }, [x0], x{{[0-9]+}}
		%ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld2.v2i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		store i64* %tmp, i64** %ptr
		ret { <2 x i64>, <2 x i64> } %ld2
		}

		declare { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld2.v2i64.p0i64(i64*)


		define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2(i64* %A, i64** %ptr) {
		;CHECK-LABEL: test_v1i64_post_imm_ld2:
		;CHECK: ld1.1d { v0, v1 }, [x0], #16
		%ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld2.v1i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i32 2
		store i64* %tmp, i64** %ptr
		ret { <1 x i64>, <1 x i64> } %ld2
		}

		define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2(i64* %A, i64** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v1i64_post_reg_ld2:
		;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
		%ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld2.v1i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		store i64* %tmp, i64** %ptr
		ret { <1 x i64>, <1 x i64> } %ld2
		}

		declare { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld2.v1i64.p0i64(i64*)


		define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2(float* %A, float** %ptr) {
		;CHECK-LABEL: test_v4f32_post_imm_ld2:
		;CHECK: ld2.4s { v0, v1 }, [x0], #32
		%ld2 = tail call { <4 x float>, <4 x float> } @llvm.arm64.neon.ld2.v4f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i32 8
		store float* %tmp, float** %ptr
		ret { <4 x float>, <4 x float> } %ld2
		}

		define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2(float* %A, float** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v4f32_post_reg_ld2:
		;CHECK: ld2.4s { v0, v1 }, [x0], x{{[0-9]+}}
		%ld2 = tail call { <4 x float>, <4 x float> } @llvm.arm64.neon.ld2.v4f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		store float* %tmp, float** %ptr
		ret { <4 x float>, <4 x float> } %ld2
		}

		declare { <4 x float>, <4 x float> } @llvm.arm64.neon.ld2.v4f32.p0f32(float*)


		define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2(float* %A, float** %ptr) {
		;CHECK-LABEL: test_v2f32_post_imm_ld2:
		;CHECK: ld2.2s { v0, v1 }, [x0], #16
		%ld2 = tail call { <2 x float>, <2 x float> } @llvm.arm64.neon.ld2.v2f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i32 4
		store float* %tmp, float** %ptr
		ret { <2 x float>, <2 x float> } %ld2
		}

		define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2(float* %A, float** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v2f32_post_reg_ld2:
		;CHECK: ld2.2s { v0, v1 }, [x0], x{{[0-9]+}}
		%ld2 = tail call { <2 x float>, <2 x float> } @llvm.arm64.neon.ld2.v2f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		store float* %tmp, float** %ptr
		ret { <2 x float>, <2 x float> } %ld2
		}

		declare { <2 x float>, <2 x float> } @llvm.arm64.neon.ld2.v2f32.p0f32(float*)


		define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2(double* %A, double** %ptr) {
		;CHECK-LABEL: test_v2f64_post_imm_ld2:
		;CHECK: ld2.2d { v0, v1 }, [x0], #32
		%ld2 = tail call { <2 x double>, <2 x double> } @llvm.arm64.neon.ld2.v2f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i32 4
		store double* %tmp, double** %ptr
		ret { <2 x double>, <2 x double> } %ld2
		}

		define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2(double* %A, double** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v2f64_post_reg_ld2:
		;CHECK: ld2.2d { v0, v1 }, [x0], x{{[0-9]+}}
		%ld2 = tail call { <2 x double>, <2 x double> } @llvm.arm64.neon.ld2.v2f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		store double* %tmp, double** %ptr
		ret { <2 x double>, <2 x double> } %ld2
		}

		declare { <2 x double>, <2 x double> } @llvm.arm64.neon.ld2.v2f64.p0f64(double*)


		define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2(double* %A, double** %ptr) {
		;CHECK-LABEL: test_v1f64_post_imm_ld2:
		;CHECK: ld1.1d { v0, v1 }, [x0], #16
		%ld2 = tail call { <1 x double>, <1 x double> } @llvm.arm64.neon.ld2.v1f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i32 2
		store double* %tmp, double** %ptr
		ret { <1 x double>, <1 x double> } %ld2
		}

		define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2(double* %A, double** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v1f64_post_reg_ld2:
		;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
		%ld2 = tail call { <1 x double>, <1 x double> } @llvm.arm64.neon.ld2.v1f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		store double* %tmp, double** %ptr
		ret { <1 x double>, <1 x double> } %ld2
		}

		declare { <1 x double>, <1 x double> } @llvm.arm64.neon.ld2.v1f64.p0f64(double*)


		define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3(i8* %A, i8** %ptr) {
		;CHECK-LABEL: test_v16i8_post_imm_ld3:
		;CHECK: ld3.16b { v0, v1, v2 }, [x0], #48
		%ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3.v16i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i32 48
		store i8* %tmp, i8** %ptr
		ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
		}

		define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3(i8* %A, i8** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v16i8_post_reg_ld3:
		;CHECK: ld3.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3.v16i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		store i8* %tmp, i8** %ptr
		ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
		}

		declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3.v16i8.p0i8(i8*)


		define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3(i8* %A, i8** %ptr) {
		;CHECK-LABEL: test_v8i8_post_imm_ld3:
		;CHECK: ld3.8b { v0, v1, v2 }, [x0], #24
		%ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3.v8i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i32 24
		store i8* %tmp, i8** %ptr
		ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
		}

		define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3(i8* %A, i8** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v8i8_post_reg_ld3:
		;CHECK: ld3.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3.v8i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		store i8* %tmp, i8** %ptr
		ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
		}

		declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3.v8i8.p0i8(i8*)


		define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3(i16* %A, i16** %ptr) {
		;CHECK-LABEL: test_v8i16_post_imm_ld3:
		;CHECK: ld3.8h { v0, v1, v2 }, [x0], #48
		%ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld3.v8i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i32 24
		store i16* %tmp, i16** %ptr
		ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
		}

		define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3(i16* %A, i16** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v8i16_post_reg_ld3:
		;CHECK: ld3.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld3.v8i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		store i16* %tmp, i16** %ptr
		ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
		}

		declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld3.v8i16.p0i16(i16*)


		define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3(i16* %A, i16** %ptr) {
		;CHECK-LABEL: test_v4i16_post_imm_ld3:
		;CHECK: ld3.4h { v0, v1, v2 }, [x0], #24
		%ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3.v4i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i32 12
		store i16* %tmp, i16** %ptr
		ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
		}

		define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3(i16* %A, i16** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v4i16_post_reg_ld3:
		;CHECK: ld3.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3.v4i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		store i16* %tmp, i16** %ptr
		ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
		}

		declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3.v4i16.p0i16(i16*)


		define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3(i32* %A, i32** %ptr) {
		;CHECK-LABEL: test_v4i32_post_imm_ld3:
		;CHECK: ld3.4s { v0, v1, v2 }, [x0], #48
		%ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3.v4i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i32 12
		store i32* %tmp, i32** %ptr
		ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
		}

		define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3(i32* %A, i32** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v4i32_post_reg_ld3:
		;CHECK: ld3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3.v4i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		store i32* %tmp, i32** %ptr
		ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
		}

		declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3.v4i32.p0i32(i32*)


		define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3(i32* %A, i32** %ptr) {
		;CHECK-LABEL: test_v2i32_post_imm_ld3:
		;CHECK: ld3.2s { v0, v1, v2 }, [x0], #24
		%ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld3.v2i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i32 6
		store i32* %tmp, i32** %ptr
		ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
		}

		define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3(i32* %A, i32** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v2i32_post_reg_ld3:
		;CHECK: ld3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld3.v2i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		store i32* %tmp, i32** %ptr
		ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
		}

		declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld3.v2i32.p0i32(i32*)


		define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3(i64* %A, i64** %ptr) {
		;CHECK-LABEL: test_v2i64_post_imm_ld3:
		;CHECK: ld3.2d { v0, v1, v2 }, [x0], #48
		%ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld3.v2i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i32 6
		store i64* %tmp, i64** %ptr
		ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
		}

		define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3(i64* %A, i64** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v2i64_post_reg_ld3:
		;CHECK: ld3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld3.v2i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		store i64* %tmp, i64** %ptr
		ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
		}

		declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld3.v2i64.p0i64(i64*)


		define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3(i64* %A, i64** %ptr) {
		;CHECK-LABEL: test_v1i64_post_imm_ld3:
		;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
		%ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld3.v1i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i32 3
		store i64* %tmp, i64** %ptr
		ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
		}

		define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3(i64* %A, i64** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v1i64_post_reg_ld3:
		;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld3.v1i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		store i64* %tmp, i64** %ptr
		ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
		}

		declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld3.v1i64.p0i64(i64*)


		define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3(float* %A, float** %ptr) {
		;CHECK-LABEL: test_v4f32_post_imm_ld3:
		;CHECK: ld3.4s { v0, v1, v2 }, [x0], #48
		%ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3.v4f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i32 12
		store float* %tmp, float** %ptr
		ret { <4 x float>, <4 x float>, <4 x float> } %ld3
		}

		define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3(float* %A, float** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v4f32_post_reg_ld3:
		;CHECK: ld3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3.v4f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		store float* %tmp, float** %ptr
		ret { <4 x float>, <4 x float>, <4 x float> } %ld3
		}

		declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3.v4f32.p0f32(float*)


		define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3(float* %A, float** %ptr) {
		;CHECK-LABEL: test_v2f32_post_imm_ld3:
		;CHECK: ld3.2s { v0, v1, v2 }, [x0], #24
		%ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld3.v2f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i32 6
		store float* %tmp, float** %ptr
		ret { <2 x float>, <2 x float>, <2 x float> } %ld3
		}

		define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3(float* %A, float** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v2f32_post_reg_ld3:
		;CHECK: ld3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld3.v2f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		store float* %tmp, float** %ptr
		ret { <2 x float>, <2 x float>, <2 x float> } %ld3
		}

		declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld3.v2f32.p0f32(float*)


		define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3(double* %A, double** %ptr) {
		;CHECK-LABEL: test_v2f64_post_imm_ld3:
		;CHECK: ld3.2d { v0, v1, v2 }, [x0], #48
		%ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld3.v2f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i32 6
		store double* %tmp, double** %ptr
		ret { <2 x double>, <2 x double>, <2 x double> } %ld3
		}

		define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3(double* %A, double** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v2f64_post_reg_ld3:
		;CHECK: ld3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld3.v2f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		store double* %tmp, double** %ptr
		ret { <2 x double>, <2 x double>, <2 x double> } %ld3
		}

		declare { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld3.v2f64.p0f64(double*)


		define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3(double* %A, double** %ptr) {
		;CHECK-LABEL: test_v1f64_post_imm_ld3:
		;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
		%ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld3.v1f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i32 3
		store double* %tmp, double** %ptr
		ret { <1 x double>, <1 x double>, <1 x double> } %ld3
		}

		define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3(double* %A, double** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v1f64_post_reg_ld3:
		;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld3.v1f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		store double* %tmp, double** %ptr
		ret { <1 x double>, <1 x double>, <1 x double> } %ld3
		}

		declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld3.v1f64.p0f64(double*)


		define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4(i8* %A, i8** %ptr) {
		;CHECK-LABEL: test_v16i8_post_imm_ld4:
		;CHECK: ld4.16b { v0, v1, v2, v3 }, [x0], #64
		%ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4.v16i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i32 64
		store i8* %tmp, i8** %ptr
		ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
		}

		define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4(i8* %A, i8** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v16i8_post_reg_ld4:
		;CHECK: ld4.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4.v16i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		store i8* %tmp, i8** %ptr
		ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
		}

		declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4.v16i8.p0i8(i8*)


		define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4(i8* %A, i8** %ptr) {
		;CHECK-LABEL: test_v8i8_post_imm_ld4:
		;CHECK: ld4.8b { v0, v1, v2, v3 }, [x0], #32
		%ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld4.v8i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i32 32
		store i8* %tmp, i8** %ptr
		ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
		}

		define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4(i8* %A, i8** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v8i8_post_reg_ld4:
		;CHECK: ld4.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld4.v8i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		store i8* %tmp, i8** %ptr
		ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
		}

		declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld4.v8i8.p0i8(i8*)


		define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4(i16* %A, i16** %ptr) {
		;CHECK-LABEL: test_v8i16_post_imm_ld4:
		;CHECK: ld4.8h { v0, v1, v2, v3 }, [x0], #64
		%ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld4.v8i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i32 32
		store i16* %tmp, i16** %ptr
		ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
		}

		define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4(i16* %A, i16** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v8i16_post_reg_ld4:
		;CHECK: ld4.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld4.v8i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		store i16* %tmp, i16** %ptr
		ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
		}

		declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld4.v8i16.p0i16(i16*)


		define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4(i16* %A, i16** %ptr) {
		;CHECK-LABEL: test_v4i16_post_imm_ld4:
		;CHECK: ld4.4h { v0, v1, v2, v3 }, [x0], #32
		%ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4.v4i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i32 16
		store i16* %tmp, i16** %ptr
		ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
		}

		define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4(i16* %A, i16** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v4i16_post_reg_ld4:
		;CHECK: ld4.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4.v4i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		store i16* %tmp, i16** %ptr
		ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
		}

		declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4.v4i16.p0i16(i16*)


		define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4(i32* %A, i32** %ptr) {
		;CHECK-LABEL: test_v4i32_post_imm_ld4:
		;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], #64
		%ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4.v4i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i32 16
		store i32* %tmp, i32** %ptr
		ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
		}

		define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4(i32* %A, i32** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v4i32_post_reg_ld4:
		;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4.v4i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		store i32* %tmp, i32** %ptr
		ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
		}

		declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4.v4i32.p0i32(i32*)


		define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4(i32* %A, i32** %ptr) {
		;CHECK-LABEL: test_v2i32_post_imm_ld4:
		;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], #32
		%ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld4.v2i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i32 8
		store i32* %tmp, i32** %ptr
		ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
		}

		define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4(i32* %A, i32** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v2i32_post_reg_ld4:
		;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld4.v2i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		store i32* %tmp, i32** %ptr
		ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
		}

		declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld4.v2i32.p0i32(i32*)


		define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4(i64* %A, i64** %ptr) {
		;CHECK-LABEL: test_v2i64_post_imm_ld4:
		;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], #64
		%ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld4.v2i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i32 8
		store i64* %tmp, i64** %ptr
		ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
		}

		define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4(i64* %A, i64** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v2i64_post_reg_ld4:
		;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld4.v2i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		store i64* %tmp, i64** %ptr
		ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
		}

		declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld4.v2i64.p0i64(i64*)


		define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4(i64* %A, i64** %ptr) {
		;CHECK-LABEL: test_v1i64_post_imm_ld4:
		;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
		%ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld4.v1i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i32 4
		store i64* %tmp, i64** %ptr
		ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
		}

		define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4(i64* %A, i64** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v1i64_post_reg_ld4:
		;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld4.v1i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		store i64* %tmp, i64** %ptr
		ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
		}

		declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld4.v1i64.p0i64(i64*)


		define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4(float* %A, float** %ptr) {
		;CHECK-LABEL: test_v4f32_post_imm_ld4:
		;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], #64
		%ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld4.v4f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i32 16
		store float* %tmp, float** %ptr
		ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
		}

		define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4(float* %A, float** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v4f32_post_reg_ld4:
		;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld4.v4f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		store float* %tmp, float** %ptr
		ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
		}

		declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld4.v4f32.p0f32(float*)


		define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4(float* %A, float** %ptr) {
		;CHECK-LABEL: test_v2f32_post_imm_ld4:
		;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], #32
		%ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld4.v2f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i32 8
		store float* %tmp, float** %ptr
		ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
		}

		define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4(float* %A, float** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v2f32_post_reg_ld4:
		;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld4.v2f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		store float* %tmp, float** %ptr
		ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
		}

		declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld4.v2f32.p0f32(float*)


		define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4(double* %A, double** %ptr) {
		;CHECK-LABEL: test_v2f64_post_imm_ld4:
		;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], #64
		%ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld4.v2f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i32 8
		store double* %tmp, double** %ptr
		ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
		}

		define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4(double* %A, double** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v2f64_post_reg_ld4:
		;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld4.v2f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		store double* %tmp, double** %ptr
		ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
		}

		declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld4.v2f64.p0f64(double*)


		define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4(double* %A, double** %ptr) {
		;CHECK-LABEL: test_v1f64_post_imm_ld4:
		;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
		%ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld4.v1f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i32 4
		store double* %tmp, double** %ptr
		ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
		}

		define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4(double* %A, double** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v1f64_post_reg_ld4:
		;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld4.v1f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		store double* %tmp, double** %ptr
		ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
		}

		declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld4.v1f64.p0f64(double*)

		define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x2(i8* %A, i8** %ptr) {
		;CHECK-LABEL: test_v16i8_post_imm_ld1x2:
		;CHECK: ld1.16b { v0, v1 }, [x0], #32
		%ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld1x2.v16i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i32 32
		store i8* %tmp, i8** %ptr
		ret { <16 x i8>, <16 x i8> } %ld1x2
		}

		define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x2(i8* %A, i8** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v16i8_post_reg_ld1x2:
		;CHECK: ld1.16b { v0, v1 }, [x0], x{{[0-9]+}}
		%ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld1x2.v16i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		store i8* %tmp, i8** %ptr
		ret { <16 x i8>, <16 x i8> } %ld1x2
		}

		declare { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld1x2.v16i8.p0i8(i8*)


		define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x2(i8* %A, i8** %ptr) {
		;CHECK-LABEL: test_v8i8_post_imm_ld1x2:
		;CHECK: ld1.8b { v0, v1 }, [x0], #16
		%ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld1x2.v8i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i32 16
		store i8* %tmp, i8** %ptr
		ret { <8 x i8>, <8 x i8> } %ld1x2
		}

		define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x2(i8* %A, i8** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v8i8_post_reg_ld1x2:
		;CHECK: ld1.8b { v0, v1 }, [x0], x{{[0-9]+}}
		%ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld1x2.v8i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		store i8* %tmp, i8** %ptr
		ret { <8 x i8>, <8 x i8> } %ld1x2
		}

		declare { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld1x2.v8i8.p0i8(i8*)


		define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x2(i16* %A, i16** %ptr) {
		;CHECK-LABEL: test_v8i16_post_imm_ld1x2:
		;CHECK: ld1.8h { v0, v1 }, [x0], #32
		%ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld1x2.v8i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i32 16
		store i16* %tmp, i16** %ptr
		ret { <8 x i16>, <8 x i16> } %ld1x2
		}

		define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x2(i16* %A, i16** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v8i16_post_reg_ld1x2:
		;CHECK: ld1.8h { v0, v1 }, [x0], x{{[0-9]+}}
		%ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld1x2.v8i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		store i16* %tmp, i16** %ptr
		ret { <8 x i16>, <8 x i16> } %ld1x2
		}

		declare { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld1x2.v8i16.p0i16(i16*)


		define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x2(i16* %A, i16** %ptr) {
		;CHECK-LABEL: test_v4i16_post_imm_ld1x2:
		;CHECK: ld1.4h { v0, v1 }, [x0], #16
		%ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld1x2.v4i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i32 8
		store i16* %tmp, i16** %ptr
		ret { <4 x i16>, <4 x i16> } %ld1x2
		}

		define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x2(i16* %A, i16** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v4i16_post_reg_ld1x2:
		;CHECK: ld1.4h { v0, v1 }, [x0], x{{[0-9]+}}
		%ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld1x2.v4i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		store i16* %tmp, i16** %ptr
		ret { <4 x i16>, <4 x i16> } %ld1x2
		}

		declare { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld1x2.v4i16.p0i16(i16*)


		define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x2(i32* %A, i32** %ptr) {
		;CHECK-LABEL: test_v4i32_post_imm_ld1x2:
		;CHECK: ld1.4s { v0, v1 }, [x0], #32
		%ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld1x2.v4i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i32 8
		store i32* %tmp, i32** %ptr
		ret { <4 x i32>, <4 x i32> } %ld1x2
		}

		define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x2(i32* %A, i32** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v4i32_post_reg_ld1x2:
		;CHECK: ld1.4s { v0, v1 }, [x0], x{{[0-9]+}}
		%ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld1x2.v4i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		store i32* %tmp, i32** %ptr
		ret { <4 x i32>, <4 x i32> } %ld1x2
		}

		declare { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld1x2.v4i32.p0i32(i32*)


		define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x2(i32* %A, i32** %ptr) {
		;CHECK-LABEL: test_v2i32_post_imm_ld1x2:
		;CHECK: ld1.2s { v0, v1 }, [x0], #16
		%ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld1x2.v2i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i32 4
		store i32* %tmp, i32** %ptr
		ret { <2 x i32>, <2 x i32> } %ld1x2
		}

		define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x2(i32* %A, i32** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v2i32_post_reg_ld1x2:
		;CHECK: ld1.2s { v0, v1 }, [x0], x{{[0-9]+}}
		%ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld1x2.v2i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		store i32* %tmp, i32** %ptr
		ret { <2 x i32>, <2 x i32> } %ld1x2
		}

		declare { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld1x2.v2i32.p0i32(i32*)


		define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x2(i64* %A, i64** %ptr) {
		;CHECK-LABEL: test_v2i64_post_imm_ld1x2:
		;CHECK: ld1.2d { v0, v1 }, [x0], #32
		%ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld1x2.v2i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i32 4
		store i64* %tmp, i64** %ptr
		ret { <2 x i64>, <2 x i64> } %ld1x2
		}

		define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x2(i64* %A, i64** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v2i64_post_reg_ld1x2:
		;CHECK: ld1.2d { v0, v1 }, [x0], x{{[0-9]+}}
		%ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld1x2.v2i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		store i64* %tmp, i64** %ptr
		ret { <2 x i64>, <2 x i64> } %ld1x2
		}

		declare { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld1x2.v2i64.p0i64(i64*)


		define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x2(i64* %A, i64** %ptr) {
		;CHECK-LABEL: test_v1i64_post_imm_ld1x2:
		;CHECK: ld1.1d { v0, v1 }, [x0], #16
		%ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld1x2.v1i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i32 2
		store i64* %tmp, i64** %ptr
		ret { <1 x i64>, <1 x i64> } %ld1x2
		}

		define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x2(i64* %A, i64** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v1i64_post_reg_ld1x2:
		;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
		%ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld1x2.v1i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		store i64* %tmp, i64** %ptr
		ret { <1 x i64>, <1 x i64> } %ld1x2
		}

		declare { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld1x2.v1i64.p0i64(i64*)


		define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x2(float* %A, float** %ptr) {
		;CHECK-LABEL: test_v4f32_post_imm_ld1x2:
		;CHECK: ld1.4s { v0, v1 }, [x0], #32
		%ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.arm64.neon.ld1x2.v4f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i32 8
		store float* %tmp, float** %ptr
		ret { <4 x float>, <4 x float> } %ld1x2
		}

		define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x2(float* %A, float** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v4f32_post_reg_ld1x2:
		;CHECK: ld1.4s { v0, v1 }, [x0], x{{[0-9]+}}
		%ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.arm64.neon.ld1x2.v4f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		store float* %tmp, float** %ptr
		ret { <4 x float>, <4 x float> } %ld1x2
		}

		declare { <4 x float>, <4 x float> } @llvm.arm64.neon.ld1x2.v4f32.p0f32(float*)


		define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x2(float* %A, float** %ptr) {
		;CHECK-LABEL: test_v2f32_post_imm_ld1x2:
		;CHECK: ld1.2s { v0, v1 }, [x0], #16
		%ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.arm64.neon.ld1x2.v2f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i32 4
		store float* %tmp, float** %ptr
		ret { <2 x float>, <2 x float> } %ld1x2
		}

		define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x2(float* %A, float** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v2f32_post_reg_ld1x2:
		;CHECK: ld1.2s { v0, v1 }, [x0], x{{[0-9]+}}
		%ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.arm64.neon.ld1x2.v2f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		store float* %tmp, float** %ptr
		ret { <2 x float>, <2 x float> } %ld1x2
		}

		declare { <2 x float>, <2 x float> } @llvm.arm64.neon.ld1x2.v2f32.p0f32(float*)


		define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x2(double* %A, double** %ptr) {
		;CHECK-LABEL: test_v2f64_post_imm_ld1x2:
		;CHECK: ld1.2d { v0, v1 }, [x0], #32
		%ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.arm64.neon.ld1x2.v2f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i32 4
		store double* %tmp, double** %ptr
		ret { <2 x double>, <2 x double> } %ld1x2
		}

		define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x2(double* %A, double** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v2f64_post_reg_ld1x2:
		;CHECK: ld1.2d { v0, v1 }, [x0], x{{[0-9]+}}
		%ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.arm64.neon.ld1x2.v2f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		store double* %tmp, double** %ptr
		ret { <2 x double>, <2 x double> } %ld1x2
		}

		declare { <2 x double>, <2 x double> } @llvm.arm64.neon.ld1x2.v2f64.p0f64(double*)


		define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x2(double* %A, double** %ptr) {
		;CHECK-LABEL: test_v1f64_post_imm_ld1x2:
		;CHECK: ld1.1d { v0, v1 }, [x0], #16
		%ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.arm64.neon.ld1x2.v1f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i32 2
		store double* %tmp, double** %ptr
		ret { <1 x double>, <1 x double> } %ld1x2
		}

		define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x2(double* %A, double** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v1f64_post_reg_ld1x2:
		;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
		%ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.arm64.neon.ld1x2.v1f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		store double* %tmp, double** %ptr
		ret { <1 x double>, <1 x double> } %ld1x2
		}

		declare { <1 x double>, <1 x double> } @llvm.arm64.neon.ld1x2.v1f64.p0f64(double*)


		define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x3(i8* %A, i8** %ptr) {
		;CHECK-LABEL: test_v16i8_post_imm_ld1x3:
		;CHECK: ld1.16b { v0, v1, v2 }, [x0], #48
		%ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld1x3.v16i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i32 48
		store i8* %tmp, i8** %ptr
		ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld1x3
		}

		define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x3(i8* %A, i8** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v16i8_post_reg_ld1x3:
		;CHECK: ld1.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld1x3.v16i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		store i8* %tmp, i8** %ptr
		ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld1x3
		}

		declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld1x3.v16i8.p0i8(i8*)


		define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x3(i8* %A, i8** %ptr) {
		;CHECK-LABEL: test_v8i8_post_imm_ld1x3:
		;CHECK: ld1.8b { v0, v1, v2 }, [x0], #24
		%ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld1x3.v8i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i32 24
		store i8* %tmp, i8** %ptr
		ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld1x3
		}

		define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x3(i8* %A, i8** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v8i8_post_reg_ld1x3:
		;CHECK: ld1.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld1x3.v8i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		store i8* %tmp, i8** %ptr
		ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld1x3
		}

		declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld1x3.v8i8.p0i8(i8*)


		define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x3(i16* %A, i16** %ptr) {
		;CHECK-LABEL: test_v8i16_post_imm_ld1x3:
		;CHECK: ld1.8h { v0, v1, v2 }, [x0], #48
		%ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld1x3.v8i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i32 24
		store i16* %tmp, i16** %ptr
		ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld1x3
		}

		define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x3(i16* %A, i16** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v8i16_post_reg_ld1x3:
		;CHECK: ld1.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld1x3.v8i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		store i16* %tmp, i16** %ptr
		ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld1x3
		}

		declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld1x3.v8i16.p0i16(i16*)


		define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x3(i16* %A, i16** %ptr) {
		;CHECK-LABEL: test_v4i16_post_imm_ld1x3:
		;CHECK: ld1.4h { v0, v1, v2 }, [x0], #24
		%ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld1x3.v4i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i32 12
		store i16* %tmp, i16** %ptr
		ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld1x3
		}

		define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x3(i16* %A, i16** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v4i16_post_reg_ld1x3:
		;CHECK: ld1.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld1x3.v4i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		store i16* %tmp, i16** %ptr
		ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld1x3
		}

		declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld1x3.v4i16.p0i16(i16*)


		define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x3(i32* %A, i32** %ptr) {
		;CHECK-LABEL: test_v4i32_post_imm_ld1x3:
		;CHECK: ld1.4s { v0, v1, v2 }, [x0], #48
		%ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld1x3.v4i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i32 12
		store i32* %tmp, i32** %ptr
		ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld1x3
		}

		define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x3(i32* %A, i32** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v4i32_post_reg_ld1x3:
		;CHECK: ld1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld1x3.v4i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		store i32* %tmp, i32** %ptr
		ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld1x3
		}

		declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld1x3.v4i32.p0i32(i32*)


		define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x3(i32* %A, i32** %ptr) {
		;CHECK-LABEL: test_v2i32_post_imm_ld1x3:
		;CHECK: ld1.2s { v0, v1, v2 }, [x0], #24
		%ld1x3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld1x3.v2i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i32 6
		store i32* %tmp, i32** %ptr
		ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld1x3
		}

		define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x3(i32* %A, i32** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v2i32_post_reg_ld1x3:
		;CHECK: ld1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld1x3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld1x3.v2i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		store i32* %tmp, i32** %ptr
		ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld1x3
		}

		declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld1x3.v2i32.p0i32(i32*)


		define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x3(i64* %A, i64** %ptr) {
		;CHECK-LABEL: test_v2i64_post_imm_ld1x3:
		;CHECK: ld1.2d { v0, v1, v2 }, [x0], #48
		%ld1x3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld1x3.v2i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i32 6
		store i64* %tmp, i64** %ptr
		ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld1x3
		}

		define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x3(i64* %A, i64** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v2i64_post_reg_ld1x3:
		;CHECK: ld1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld1x3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld1x3.v2i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		store i64* %tmp, i64** %ptr
		ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld1x3
		}

		declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld1x3.v2i64.p0i64(i64*)


		define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x3(i64* %A, i64** %ptr) {
		;CHECK-LABEL: test_v1i64_post_imm_ld1x3:
		;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
		%ld1x3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld1x3.v1i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i32 3
		store i64* %tmp, i64** %ptr
		ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld1x3
		}

		define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x3(i64* %A, i64** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v1i64_post_reg_ld1x3:
		;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld1x3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld1x3.v1i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		store i64* %tmp, i64** %ptr
		ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld1x3
		}

		declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld1x3.v1i64.p0i64(i64*)


		define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x3(float* %A, float** %ptr) {
		;CHECK-LABEL: test_v4f32_post_imm_ld1x3:
		;CHECK: ld1.4s { v0, v1, v2 }, [x0], #48
		%ld1x3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld1x3.v4f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i32 12
		store float* %tmp, float** %ptr
		ret { <4 x float>, <4 x float>, <4 x float> } %ld1x3
		}

		define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x3(float* %A, float** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v4f32_post_reg_ld1x3:
		;CHECK: ld1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld1x3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld1x3.v4f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		store float* %tmp, float** %ptr
		ret { <4 x float>, <4 x float>, <4 x float> } %ld1x3
		}

		declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld1x3.v4f32.p0f32(float*)


		define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x3(float* %A, float** %ptr) {
		;CHECK-LABEL: test_v2f32_post_imm_ld1x3:
		;CHECK: ld1.2s { v0, v1, v2 }, [x0], #24
		%ld1x3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld1x3.v2f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i32 6
		store float* %tmp, float** %ptr
		ret { <2 x float>, <2 x float>, <2 x float> } %ld1x3
		}

		define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x3(float* %A, float** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v2f32_post_reg_ld1x3:
		;CHECK: ld1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld1x3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld1x3.v2f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		store float* %tmp, float** %ptr
		ret { <2 x float>, <2 x float>, <2 x float> } %ld1x3
		}

		declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld1x3.v2f32.p0f32(float*)


		define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x3(double* %A, double** %ptr) {
		;CHECK-LABEL: test_v2f64_post_imm_ld1x3:
		;CHECK: ld1.2d { v0, v1, v2 }, [x0], #48
		%ld1x3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld1x3.v2f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i32 6
		store double* %tmp, double** %ptr
		ret { <2 x double>, <2 x double>, <2 x double> } %ld1x3
		}

		define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x3(double* %A, double** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v2f64_post_reg_ld1x3:
		;CHECK: ld1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld1x3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld1x3.v2f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		store double* %tmp, double** %ptr
		ret { <2 x double>, <2 x double>, <2 x double> } %ld1x3
		}

		declare { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld1x3.v2f64.p0f64(double*)


		define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x3(double* %A, double** %ptr) {
		;CHECK-LABEL: test_v1f64_post_imm_ld1x3:
		;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
		%ld1x3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld1x3.v1f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i32 3
		store double* %tmp, double** %ptr
		ret { <1 x double>, <1 x double>, <1 x double> } %ld1x3
		}

		define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x3(double* %A, double** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v1f64_post_reg_ld1x3:
		;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld1x3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld1x3.v1f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		store double* %tmp, double** %ptr
		ret { <1 x double>, <1 x double>, <1 x double> } %ld1x3
		}

		declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld1x3.v1f64.p0f64(double*)


		define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x4(i8* %A, i8** %ptr) {
		;CHECK-LABEL: test_v16i8_post_imm_ld1x4:
		;CHECK: ld1.16b { v0, v1, v2, v3 }, [x0], #64
		%ld1x4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld1x4.v16i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i32 64
		store i8* %tmp, i8** %ptr
		ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld1x4
		}

		define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x4(i8* %A, i8** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v16i8_post_reg_ld1x4:
		;CHECK: ld1.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld1x4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld1x4.v16i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		store i8* %tmp, i8** %ptr
		ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld1x4
		}

		declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld1x4.v16i8.p0i8(i8*)


		define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x4(i8* %A, i8** %ptr) {
		;CHECK-LABEL: test_v8i8_post_imm_ld1x4:
		;CHECK: ld1.8b { v0, v1, v2, v3 }, [x0], #32
		%ld1x4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld1x4.v8i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i32 32
		store i8* %tmp, i8** %ptr
		ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld1x4
		}

		define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x4(i8* %A, i8** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v8i8_post_reg_ld1x4:
		;CHECK: ld1.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld1x4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld1x4.v8i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		store i8* %tmp, i8** %ptr
		ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld1x4
		}

		declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld1x4.v8i8.p0i8(i8*)


		define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x4(i16* %A, i16** %ptr) {
		;CHECK-LABEL: test_v8i16_post_imm_ld1x4:
		;CHECK: ld1.8h { v0, v1, v2, v3 }, [x0], #64
		%ld1x4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld1x4.v8i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i32 32
		store i16* %tmp, i16** %ptr
		ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld1x4
		}

		define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x4(i16* %A, i16** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v8i16_post_reg_ld1x4:
		;CHECK: ld1.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld1x4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld1x4.v8i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		store i16* %tmp, i16** %ptr
		ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld1x4
		}

		declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld1x4.v8i16.p0i16(i16*)


		define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x4(i16* %A, i16** %ptr) {
		;CHECK-LABEL: test_v4i16_post_imm_ld1x4:
		;CHECK: ld1.4h { v0, v1, v2, v3 }, [x0], #32
		%ld1x4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld1x4.v4i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i32 16
		store i16* %tmp, i16** %ptr
		ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld1x4
		}

		define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x4(i16* %A, i16** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v4i16_post_reg_ld1x4:
		;CHECK: ld1.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld1x4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld1x4.v4i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		store i16* %tmp, i16** %ptr
		ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld1x4
		}

		declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld1x4.v4i16.p0i16(i16*)


		define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x4(i32* %A, i32** %ptr) {
		;CHECK-LABEL: test_v4i32_post_imm_ld1x4:
		;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], #64
		%ld1x4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld1x4.v4i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i32 16
		store i32* %tmp, i32** %ptr
		ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld1x4
		}

		define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x4(i32* %A, i32** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v4i32_post_reg_ld1x4:
		;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld1x4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld1x4.v4i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		store i32* %tmp, i32** %ptr
		ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld1x4
		}

		declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld1x4.v4i32.p0i32(i32*)


		define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x4(i32* %A, i32** %ptr) {
		;CHECK-LABEL: test_v2i32_post_imm_ld1x4:
		;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], #32
		%ld1x4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld1x4.v2i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i32 8
		store i32* %tmp, i32** %ptr
		ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld1x4
		}

		define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x4(i32* %A, i32** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v2i32_post_reg_ld1x4:
		;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld1x4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld1x4.v2i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		store i32* %tmp, i32** %ptr
		ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld1x4
		}

		declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld1x4.v2i32.p0i32(i32*)


		define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x4(i64* %A, i64** %ptr) {
		;CHECK-LABEL: test_v2i64_post_imm_ld1x4:
		;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], #64
		%ld1x4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld1x4.v2i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i32 8
		store i64* %tmp, i64** %ptr
		ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld1x4
		}

		define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x4(i64* %A, i64** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v2i64_post_reg_ld1x4:
		;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld1x4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld1x4.v2i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		store i64* %tmp, i64** %ptr
		ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld1x4
		}

		declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld1x4.v2i64.p0i64(i64*)


		define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x4(i64* %A, i64** %ptr) {
		;CHECK-LABEL: test_v1i64_post_imm_ld1x4:
		;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
		%ld1x4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld1x4.v1i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i32 4
		store i64* %tmp, i64** %ptr
		ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld1x4
		}

		define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x4(i64* %A, i64** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v1i64_post_reg_ld1x4:
		;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld1x4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld1x4.v1i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		store i64* %tmp, i64** %ptr
		ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld1x4
		}

		declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld1x4.v1i64.p0i64(i64*)


		define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x4(float* %A, float** %ptr) {
		;CHECK-LABEL: test_v4f32_post_imm_ld1x4:
		;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], #64
		%ld1x4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld1x4.v4f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i32 16
		store float* %tmp, float** %ptr
		ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld1x4
		}

		define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x4(float* %A, float** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v4f32_post_reg_ld1x4:
		;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld1x4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld1x4.v4f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		store float* %tmp, float** %ptr
		ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld1x4
		}

		declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld1x4.v4f32.p0f32(float*)


		define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x4(float* %A, float** %ptr) {
		;CHECK-LABEL: test_v2f32_post_imm_ld1x4:
		;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], #32
		%ld1x4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld1x4.v2f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i32 8
		store float* %tmp, float** %ptr
		ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld1x4
		}

		define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x4(float* %A, float** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v2f32_post_reg_ld1x4:
		;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld1x4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld1x4.v2f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		store float* %tmp, float** %ptr
		ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld1x4
		}

		declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld1x4.v2f32.p0f32(float*)


		define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x4(double* %A, double** %ptr) {
		;CHECK-LABEL: test_v2f64_post_imm_ld1x4:
		;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], #64
		%ld1x4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld1x4.v2f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i32 8
		store double* %tmp, double** %ptr
		ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld1x4
		}

		define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x4(double* %A, double** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v2f64_post_reg_ld1x4:
		;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld1x4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld1x4.v2f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		store double* %tmp, double** %ptr
		ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld1x4
		}

		declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld1x4.v2f64.p0f64(double*)


		define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x4(double* %A, double** %ptr) {
		;CHECK-LABEL: test_v1f64_post_imm_ld1x4:
		;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
		%ld1x4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld1x4.v1f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i32 4
		store double* %tmp, double** %ptr
		ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld1x4
		}

		define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x4(double* %A, double** %ptr, i64 %inc) {
		;CHECK-LABEL: test_v1f64_post_reg_ld1x4:
		;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld1x4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld1x4.v1f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		store double* %tmp, double** %ptr
		ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld1x4
		}

		declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld1x4.v1f64.p0f64(double*)


		define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2r(i8* %A, i8** %ptr) nounwind {
		;CHECK-LABEL: test_v16i8_post_imm_ld2r:
		;CHECK: ld2r.16b { v0, v1 }, [x0], #2
		%ld2 = call { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2r.v16i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i32 2
		store i8* %tmp, i8** %ptr
		ret { <16 x i8>, <16 x i8> } %ld2
		}

		define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2r(i8* %A, i8** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v16i8_post_reg_ld2r:
		;CHECK: ld2r.16b { v0, v1 }, [x0], x{{[0-9]+}}
		%ld2 = call { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2r.v16i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		store i8* %tmp, i8** %ptr
		ret { <16 x i8>, <16 x i8> } %ld2
		}

		declare { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2r.v16i8.p0i8(i8*) nounwind readonly


		define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2r(i8* %A, i8** %ptr) nounwind {
		;CHECK-LABEL: test_v8i8_post_imm_ld2r:
		;CHECK: ld2r.8b { v0, v1 }, [x0], #2
		%ld2 = call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2r.v8i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i32 2
		store i8* %tmp, i8** %ptr
		ret { <8 x i8>, <8 x i8> } %ld2
		}

		define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2r(i8* %A, i8** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v8i8_post_reg_ld2r:
		;CHECK: ld2r.8b { v0, v1 }, [x0], x{{[0-9]+}}
		%ld2 = call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2r.v8i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		store i8* %tmp, i8** %ptr
		ret { <8 x i8>, <8 x i8> } %ld2
		}

		declare { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2r.v8i8.p0i8(i8*) nounwind readonly


		define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2r(i16* %A, i16** %ptr) nounwind {
		;CHECK-LABEL: test_v8i16_post_imm_ld2r:
		;CHECK: ld2r.8h { v0, v1 }, [x0], #4
		%ld2 = call { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld2r.v8i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i32 2
		store i16* %tmp, i16** %ptr
		ret { <8 x i16>, <8 x i16> } %ld2
		}

		define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2r(i16* %A, i16** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v8i16_post_reg_ld2r:
		;CHECK: ld2r.8h { v0, v1 }, [x0], x{{[0-9]+}}
		%ld2 = call { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld2r.v8i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		store i16* %tmp, i16** %ptr
		ret { <8 x i16>, <8 x i16> } %ld2
		}

		declare { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld2r.v8i16.p0i16(i16*) nounwind readonly


		define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2r(i16* %A, i16** %ptr) nounwind {
		;CHECK-LABEL: test_v4i16_post_imm_ld2r:
		;CHECK: ld2r.4h { v0, v1 }, [x0], #4
		%ld2 = call { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld2r.v4i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i32 2
		store i16* %tmp, i16** %ptr
		ret { <4 x i16>, <4 x i16> } %ld2
		}

		define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2r(i16* %A, i16** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4i16_post_reg_ld2r:
		;CHECK: ld2r.4h { v0, v1 }, [x0], x{{[0-9]+}}
		%ld2 = call { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld2r.v4i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		store i16* %tmp, i16** %ptr
		ret { <4 x i16>, <4 x i16> } %ld2
		}

		declare { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld2r.v4i16.p0i16(i16*) nounwind readonly


		define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2r(i32* %A, i32** %ptr) nounwind {
		;CHECK-LABEL: test_v4i32_post_imm_ld2r:
		;CHECK: ld2r.4s { v0, v1 }, [x0], #8
		%ld2 = call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2r.v4i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i32 2
		store i32* %tmp, i32** %ptr
		ret { <4 x i32>, <4 x i32> } %ld2
		}

		define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2r(i32* %A, i32** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4i32_post_reg_ld2r:
		;CHECK: ld2r.4s { v0, v1 }, [x0], x{{[0-9]+}}
		%ld2 = call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2r.v4i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		store i32* %tmp, i32** %ptr
		ret { <4 x i32>, <4 x i32> } %ld2
		}

		declare { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2r.v4i32.p0i32(i32*) nounwind readonly

		define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2r(i32* %A, i32** %ptr) nounwind {
		;CHECK-LABEL: test_v2i32_post_imm_ld2r:
		;CHECK: ld2r.2s { v0, v1 }, [x0], #8
		%ld2 = call { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2r.v2i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i32 2
		store i32* %tmp, i32** %ptr
		ret { <2 x i32>, <2 x i32> } %ld2
		}

		define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2r(i32* %A, i32** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2i32_post_reg_ld2r:
		;CHECK: ld2r.2s { v0, v1 }, [x0], x{{[0-9]+}}
		%ld2 = call { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2r.v2i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		store i32* %tmp, i32** %ptr
		ret { <2 x i32>, <2 x i32> } %ld2
		}

		declare { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2r.v2i32.p0i32(i32*) nounwind readonly


		define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2r(i64* %A, i64** %ptr) nounwind {
		;CHECK-LABEL: test_v2i64_post_imm_ld2r:
		;CHECK: ld2r.2d { v0, v1 }, [x0], #16
		%ld2 = call { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld2r.v2i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i32 2
		store i64* %tmp, i64** %ptr
		ret { <2 x i64>, <2 x i64> } %ld2
		}

		define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2r(i64* %A, i64** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2i64_post_reg_ld2r:
		;CHECK: ld2r.2d { v0, v1 }, [x0], x{{[0-9]+}}
		%ld2 = call { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld2r.v2i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		store i64* %tmp, i64** %ptr
		ret { <2 x i64>, <2 x i64> } %ld2
		}

		declare { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld2r.v2i64.p0i64(i64*) nounwind readonly

		define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2r(i64* %A, i64** %ptr) nounwind {
		;CHECK-LABEL: test_v1i64_post_imm_ld2r:
		;CHECK: ld2r.1d { v0, v1 }, [x0], #16
		%ld2 = call { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld2r.v1i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i32 2
		store i64* %tmp, i64** %ptr
		ret { <1 x i64>, <1 x i64> } %ld2
		}

		define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2r(i64* %A, i64** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v1i64_post_reg_ld2r:
		;CHECK: ld2r.1d { v0, v1 }, [x0], x{{[0-9]+}}
		%ld2 = call { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld2r.v1i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		store i64* %tmp, i64** %ptr
		ret { <1 x i64>, <1 x i64> } %ld2
		}

		declare { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld2r.v1i64.p0i64(i64*) nounwind readonly


		define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2r(float* %A, float** %ptr) nounwind {
		;CHECK-LABEL: test_v4f32_post_imm_ld2r:
		;CHECK: ld2r.4s { v0, v1 }, [x0], #8
		%ld2 = call { <4 x float>, <4 x float> } @llvm.arm64.neon.ld2r.v4f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i32 2
		store float* %tmp, float** %ptr
		ret { <4 x float>, <4 x float> } %ld2
		}

		define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2r(float* %A, float** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4f32_post_reg_ld2r:
		;CHECK: ld2r.4s { v0, v1 }, [x0], x{{[0-9]+}}
		%ld2 = call { <4 x float>, <4 x float> } @llvm.arm64.neon.ld2r.v4f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		store float* %tmp, float** %ptr
		ret { <4 x float>, <4 x float> } %ld2
		}

		declare { <4 x float>, <4 x float> } @llvm.arm64.neon.ld2r.v4f32.p0f32(float*) nounwind readonly

		define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2r(float* %A, float** %ptr) nounwind {
		;CHECK-LABEL: test_v2f32_post_imm_ld2r:
		;CHECK: ld2r.2s { v0, v1 }, [x0], #8
		%ld2 = call { <2 x float>, <2 x float> } @llvm.arm64.neon.ld2r.v2f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i32 2
		store float* %tmp, float** %ptr
		ret { <2 x float>, <2 x float> } %ld2
		}

		define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2r(float* %A, float** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2f32_post_reg_ld2r:
		;CHECK: ld2r.2s { v0, v1 }, [x0], x{{[0-9]+}}
		%ld2 = call { <2 x float>, <2 x float> } @llvm.arm64.neon.ld2r.v2f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		store float* %tmp, float** %ptr
		ret { <2 x float>, <2 x float> } %ld2
		}

		declare { <2 x float>, <2 x float> } @llvm.arm64.neon.ld2r.v2f32.p0f32(float*) nounwind readonly


		define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2r(double* %A, double** %ptr) nounwind {
		;CHECK-LABEL: test_v2f64_post_imm_ld2r:
		;CHECK: ld2r.2d { v0, v1 }, [x0], #16
		%ld2 = call { <2 x double>, <2 x double> } @llvm.arm64.neon.ld2r.v2f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i32 2
		store double* %tmp, double** %ptr
		ret { <2 x double>, <2 x double> } %ld2
		}

		define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2r(double* %A, double** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2f64_post_reg_ld2r:
		;CHECK: ld2r.2d { v0, v1 }, [x0], x{{[0-9]+}}
		%ld2 = call { <2 x double>, <2 x double> } @llvm.arm64.neon.ld2r.v2f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		store double* %tmp, double** %ptr
		ret { <2 x double>, <2 x double> } %ld2
		}

		declare { <2 x double>, <2 x double> } @llvm.arm64.neon.ld2r.v2f64.p0f64(double*) nounwind readonly

		define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2r(double* %A, double** %ptr) nounwind {
		;CHECK-LABEL: test_v1f64_post_imm_ld2r:
		;CHECK: ld2r.1d { v0, v1 }, [x0], #16
		%ld2 = call { <1 x double>, <1 x double> } @llvm.arm64.neon.ld2r.v1f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i32 2
		store double* %tmp, double** %ptr
		ret { <1 x double>, <1 x double> } %ld2
		}

		define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2r(double* %A, double** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v1f64_post_reg_ld2r:
		;CHECK: ld2r.1d { v0, v1 }, [x0], x{{[0-9]+}}
		%ld2 = call { <1 x double>, <1 x double> } @llvm.arm64.neon.ld2r.v1f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		store double* %tmp, double** %ptr
		ret { <1 x double>, <1 x double> } %ld2
		}

		declare { <1 x double>, <1 x double> } @llvm.arm64.neon.ld2r.v1f64.p0f64(double*) nounwind readonly


		define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3r(i8* %A, i8** %ptr) nounwind {
		;CHECK-LABEL: test_v16i8_post_imm_ld3r:
		;CHECK: ld3r.16b { v0, v1, v2 }, [x0], #3
		%ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3r.v16i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i32 3
		store i8* %tmp, i8** %ptr
		ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
		}

		define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3r(i8* %A, i8** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v16i8_post_reg_ld3r:
		;CHECK: ld3r.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3r.v16i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		store i8* %tmp, i8** %ptr
		ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
		}

		declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3r.v16i8.p0i8(i8*) nounwind readonly


		define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3r(i8* %A, i8** %ptr) nounwind {
		;CHECK-LABEL: test_v8i8_post_imm_ld3r:
		;CHECK: ld3r.8b { v0, v1, v2 }, [x0], #3
		%ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3r.v8i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i32 3
		store i8* %tmp, i8** %ptr
		ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
		}

		define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3r(i8* %A, i8** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v8i8_post_reg_ld3r:
		;CHECK: ld3r.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3r.v8i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		store i8* %tmp, i8** %ptr
		ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
		}

		declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3r.v8i8.p0i8(i8*) nounwind readonly


		define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3r(i16* %A, i16** %ptr) nounwind {
		;CHECK-LABEL: test_v8i16_post_imm_ld3r:
		;CHECK: ld3r.8h { v0, v1, v2 }, [x0], #6
		%ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld3r.v8i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i32 3
		store i16* %tmp, i16** %ptr
		ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
		}

		define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3r(i16* %A, i16** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v8i16_post_reg_ld3r:
		;CHECK: ld3r.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld3r.v8i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		store i16* %tmp, i16** %ptr
		ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
		}

		declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld3r.v8i16.p0i16(i16*) nounwind readonly


		define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3r(i16* %A, i16** %ptr) nounwind {
		;CHECK-LABEL: test_v4i16_post_imm_ld3r:
		;CHECK: ld3r.4h { v0, v1, v2 }, [x0], #6
		%ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3r.v4i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i32 3
		store i16* %tmp, i16** %ptr
		ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
		}

		define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3r(i16* %A, i16** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4i16_post_reg_ld3r:
		;CHECK: ld3r.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3r.v4i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		store i16* %tmp, i16** %ptr
		ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
		}

		declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3r.v4i16.p0i16(i16*) nounwind readonly


		define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3r(i32* %A, i32** %ptr) nounwind {
		;CHECK-LABEL: test_v4i32_post_imm_ld3r:
		;CHECK: ld3r.4s { v0, v1, v2 }, [x0], #12
		%ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3r.v4i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i32 3
		store i32* %tmp, i32** %ptr
		ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
		}

		define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3r(i32* %A, i32** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4i32_post_reg_ld3r:
		;CHECK: ld3r.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3r.v4i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		store i32* %tmp, i32** %ptr
		ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
		}

		declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3r.v4i32.p0i32(i32*) nounwind readonly

		define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3r(i32* %A, i32** %ptr) nounwind {
		;CHECK-LABEL: test_v2i32_post_imm_ld3r:
		;CHECK: ld3r.2s { v0, v1, v2 }, [x0], #12
		%ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld3r.v2i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i32 3
		store i32* %tmp, i32** %ptr
		ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
		}

		define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3r(i32* %A, i32** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2i32_post_reg_ld3r:
		;CHECK: ld3r.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld3r.v2i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		store i32* %tmp, i32** %ptr
		ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
		}

		declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld3r.v2i32.p0i32(i32*) nounwind readonly


		define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3r(i64* %A, i64** %ptr) nounwind {
		;CHECK-LABEL: test_v2i64_post_imm_ld3r:
		;CHECK: ld3r.2d { v0, v1, v2 }, [x0], #24
		%ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld3r.v2i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i32 3
		store i64* %tmp, i64** %ptr
		ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
		}

		define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3r(i64* %A, i64** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2i64_post_reg_ld3r:
		;CHECK: ld3r.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld3r.v2i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		store i64* %tmp, i64** %ptr
		ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
		}

		declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld3r.v2i64.p0i64(i64*) nounwind readonly

		define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3r(i64* %A, i64** %ptr) nounwind {
		;CHECK-LABEL: test_v1i64_post_imm_ld3r:
		;CHECK: ld3r.1d { v0, v1, v2 }, [x0], #24
		%ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld3r.v1i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i32 3
		store i64* %tmp, i64** %ptr
		ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
		}

		define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3r(i64* %A, i64** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v1i64_post_reg_ld3r:
		;CHECK: ld3r.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld3r.v1i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		store i64* %tmp, i64** %ptr
		ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
		}

		declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld3r.v1i64.p0i64(i64*) nounwind readonly


		define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3r(float* %A, float** %ptr) nounwind {
		;CHECK-LABEL: test_v4f32_post_imm_ld3r:
		;CHECK: ld3r.4s { v0, v1, v2 }, [x0], #12
		%ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3r.v4f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i32 3
		store float* %tmp, float** %ptr
		ret { <4 x float>, <4 x float>, <4 x float> } %ld3
		}

		define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3r(float* %A, float** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4f32_post_reg_ld3r:
		;CHECK: ld3r.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3r.v4f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		store float* %tmp, float** %ptr
		ret { <4 x float>, <4 x float>, <4 x float> } %ld3
		}

		declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3r.v4f32.p0f32(float*) nounwind readonly

		define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3r(float* %A, float** %ptr) nounwind {
		;CHECK-LABEL: test_v2f32_post_imm_ld3r:
		;CHECK: ld3r.2s { v0, v1, v2 }, [x0], #12
		%ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld3r.v2f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i32 3
		store float* %tmp, float** %ptr
		ret { <2 x float>, <2 x float>, <2 x float> } %ld3
		}

		define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3r(float* %A, float** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2f32_post_reg_ld3r:
		;CHECK: ld3r.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld3r.v2f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		store float* %tmp, float** %ptr
		ret { <2 x float>, <2 x float>, <2 x float> } %ld3
		}

		declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld3r.v2f32.p0f32(float*) nounwind readonly


		define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3r(double* %A, double** %ptr) nounwind {
		;CHECK-LABEL: test_v2f64_post_imm_ld3r:
		;CHECK: ld3r.2d { v0, v1, v2 }, [x0], #24
		%ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld3r.v2f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i32 3
		store double* %tmp, double** %ptr
		ret { <2 x double>, <2 x double>, <2 x double> } %ld3
		}

		define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3r(double* %A, double** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2f64_post_reg_ld3r:
		;CHECK: ld3r.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld3r.v2f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		store double* %tmp, double** %ptr
		ret { <2 x double>, <2 x double>, <2 x double> } %ld3
		}

		declare { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld3r.v2f64.p0f64(double*) nounwind readonly

		define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3r(double* %A, double** %ptr) nounwind {
		;CHECK-LABEL: test_v1f64_post_imm_ld3r:
		;CHECK: ld3r.1d { v0, v1, v2 }, [x0], #24
		%ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld3r.v1f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i32 3
		store double* %tmp, double** %ptr
		ret { <1 x double>, <1 x double>, <1 x double> } %ld3
		}

		define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3r(double* %A, double** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v1f64_post_reg_ld3r:
		;CHECK: ld3r.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
		%ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld3r.v1f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		store double* %tmp, double** %ptr
		ret { <1 x double>, <1 x double>, <1 x double> } %ld3
		}

		declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld3r.v1f64.p0f64(double*) nounwind readonly


		define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4r(i8* %A, i8** %ptr) nounwind {
		;CHECK-LABEL: test_v16i8_post_imm_ld4r:
		;CHECK: ld4r.16b { v0, v1, v2, v3 }, [x0], #4
		%ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4r.v16i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i32 4
		store i8* %tmp, i8** %ptr
		ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
		}

		define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4r(i8* %A, i8** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v16i8_post_reg_ld4r:
		;CHECK: ld4r.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4r.v16i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		store i8* %tmp, i8** %ptr
		ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
		}

		declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4r.v16i8.p0i8(i8*) nounwind readonly


		define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4r(i8* %A, i8** %ptr) nounwind {
		;CHECK-LABEL: test_v8i8_post_imm_ld4r:
		;CHECK: ld4r.8b { v0, v1, v2, v3 }, [x0], #4
		%ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld4r.v8i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i32 4
		store i8* %tmp, i8** %ptr
		ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
		}

		define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4r(i8* %A, i8** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v8i8_post_reg_ld4r:
		;CHECK: ld4r.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld4r.v8i8.p0i8(i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		store i8* %tmp, i8** %ptr
		ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
		}

		declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld4r.v8i8.p0i8(i8*) nounwind readonly


		define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4r(i16* %A, i16** %ptr) nounwind {
		;CHECK-LABEL: test_v8i16_post_imm_ld4r:
		;CHECK: ld4r.8h { v0, v1, v2, v3 }, [x0], #8
		%ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld4r.v8i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i32 4
		store i16* %tmp, i16** %ptr
		ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
		}

		define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4r(i16* %A, i16** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v8i16_post_reg_ld4r:
		;CHECK: ld4r.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld4r.v8i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		store i16* %tmp, i16** %ptr
		ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
		}

		declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld4r.v8i16.p0i16(i16*) nounwind readonly


		define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4r(i16* %A, i16** %ptr) nounwind {
		;CHECK-LABEL: test_v4i16_post_imm_ld4r:
		;CHECK: ld4r.4h { v0, v1, v2, v3 }, [x0], #8
		%ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4r.v4i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i32 4
		store i16* %tmp, i16** %ptr
		ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
		}

		define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4r(i16* %A, i16** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4i16_post_reg_ld4r:
		;CHECK: ld4r.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4r.v4i16.p0i16(i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		store i16* %tmp, i16** %ptr
		ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
		}

		declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4r.v4i16.p0i16(i16*) nounwind readonly


		define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4r(i32* %A, i32** %ptr) nounwind {
		;CHECK-LABEL: test_v4i32_post_imm_ld4r:
		;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], #16
		%ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4r.v4i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i32 4
		store i32* %tmp, i32** %ptr
		ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
		}

		define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4r(i32* %A, i32** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4i32_post_reg_ld4r:
		;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4r.v4i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		store i32* %tmp, i32** %ptr
		ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
		}

		declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4r.v4i32.p0i32(i32*) nounwind readonly

		define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4r(i32* %A, i32** %ptr) nounwind {
		;CHECK-LABEL: test_v2i32_post_imm_ld4r:
		;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], #16
		%ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld4r.v2i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i32 4
		store i32* %tmp, i32** %ptr
		ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
		}

		define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4r(i32* %A, i32** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2i32_post_reg_ld4r:
		;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld4r.v2i32.p0i32(i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		store i32* %tmp, i32** %ptr
		ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
		}

		declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld4r.v2i32.p0i32(i32*) nounwind readonly


		define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4r(i64* %A, i64** %ptr) nounwind {
		;CHECK-LABEL: test_v2i64_post_imm_ld4r:
		;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], #32
		%ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld4r.v2i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i32 4
		store i64* %tmp, i64** %ptr
		ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
		}

		define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4r(i64* %A, i64** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2i64_post_reg_ld4r:
		;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld4r.v2i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		store i64* %tmp, i64** %ptr
		ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
		}

		declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld4r.v2i64.p0i64(i64*) nounwind readonly

		define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4r(i64* %A, i64** %ptr) nounwind {
		;CHECK-LABEL: test_v1i64_post_imm_ld4r:
		;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], #32
		%ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld4r.v1i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i32 4
		store i64* %tmp, i64** %ptr
		ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
		}

		define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4r(i64* %A, i64** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v1i64_post_reg_ld4r:
		;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld4r.v1i64.p0i64(i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		store i64* %tmp, i64** %ptr
		ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
		}

		declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld4r.v1i64.p0i64(i64*) nounwind readonly


		define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4r(float* %A, float** %ptr) nounwind {
		;CHECK-LABEL: test_v4f32_post_imm_ld4r:
		;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], #16
		%ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld4r.v4f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i32 4
		store float* %tmp, float** %ptr
		ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
		}

		define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4r(float* %A, float** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4f32_post_reg_ld4r:
		;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld4r.v4f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		store float* %tmp, float** %ptr
		ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
		}

		declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld4r.v4f32.p0f32(float*) nounwind readonly

		define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4r(float* %A, float** %ptr) nounwind {
		;CHECK-LABEL: test_v2f32_post_imm_ld4r:
		;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], #16
		%ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld4r.v2f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i32 4
		store float* %tmp, float** %ptr
		ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
		}

		define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4r(float* %A, float** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2f32_post_reg_ld4r:
		;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld4r.v2f32.p0f32(float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		store float* %tmp, float** %ptr
		ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
		}

		declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld4r.v2f32.p0f32(float*) nounwind readonly


		define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4r(double* %A, double** %ptr) nounwind {
		;CHECK-LABEL: test_v2f64_post_imm_ld4r:
		;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], #32
		%ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld4r.v2f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i32 4
		store double* %tmp, double** %ptr
		ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
		}

		define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4r(double* %A, double** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2f64_post_reg_ld4r:
		;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld4r.v2f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		store double* %tmp, double** %ptr
		ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
		}

		declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld4r.v2f64.p0f64(double*) nounwind readonly

		define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4r(double* %A, double** %ptr) nounwind {
		;CHECK-LABEL: test_v1f64_post_imm_ld4r:
		;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], #32
		%ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld4r.v1f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i32 4
		store double* %tmp, double** %ptr
		ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
		}

		define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4r(double* %A, double** %ptr, i64 %inc) nounwind {
		;CHECK-LABEL: test_v1f64_post_reg_ld4r:
		;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		%ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld4r.v1f64.p0f64(double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		store double* %tmp, double** %ptr
		ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
		}

		declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld4r.v1f64.p0f64(double*) nounwind readonly


		define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
		;CHECK-LABEL: test_v16i8_post_imm_ld2lane:
		;CHECK: ld2.b { v0, v1 }[0], [x0], #2
		%ld2 = call { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
		%tmp = getelementptr i8* %A, i32 2
		store i8* %tmp, i8** %ptr
		ret { <16 x i8>, <16 x i8> } %ld2
		}

		define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C) nounwind {
		;CHECK-LABEL: test_v16i8_post_reg_ld2lane:
		;CHECK: ld2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
		%ld2 = call { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		store i8* %tmp, i8** %ptr
		ret { <16 x i8>, <16 x i8> } %ld2
		}

		declare { <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readonly


		define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
		;CHECK-LABEL: test_v8i8_post_imm_ld2lane:
		;CHECK: ld2.b { v0, v1 }[0], [x0], #2
		%ld2 = call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
		%tmp = getelementptr i8* %A, i32 2
		store i8* %tmp, i8** %ptr
		ret { <8 x i8>, <8 x i8> } %ld2
		}

		define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C) nounwind {
		;CHECK-LABEL: test_v8i8_post_reg_ld2lane:
		;CHECK: ld2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
		%ld2 = call { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		store i8* %tmp, i8** %ptr
		ret { <8 x i8>, <8 x i8> } %ld2
		}

		declare { <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld2lane.v8i8.p0i8(<8 x i8>, <8 x i8>, i64, i8*) nounwind readonly


		define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
		;CHECK-LABEL: test_v8i16_post_imm_ld2lane:
		;CHECK: ld2.h { v0, v1 }[0], [x0], #4
		%ld2 = call { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
		%tmp = getelementptr i16* %A, i32 2
		store i16* %tmp, i16** %ptr
		ret { <8 x i16>, <8 x i16> } %ld2
		}

		define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C) nounwind {
		;CHECK-LABEL: test_v8i16_post_reg_ld2lane:
		;CHECK: ld2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
		%ld2 = call { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		store i16* %tmp, i16** %ptr
		ret { <8 x i16>, <8 x i16> } %ld2
		}

		declare { <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readonly


		define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
		;CHECK-LABEL: test_v4i16_post_imm_ld2lane:
		;CHECK: ld2.h { v0, v1 }[0], [x0], #4
		%ld2 = call { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
		%tmp = getelementptr i16* %A, i32 2
		store i16* %tmp, i16** %ptr
		ret { <4 x i16>, <4 x i16> } %ld2
		}

		define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C) nounwind {
		;CHECK-LABEL: test_v4i16_post_reg_ld2lane:
		;CHECK: ld2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
		%ld2 = call { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		store i16* %tmp, i16** %ptr
		ret { <4 x i16>, <4 x i16> } %ld2
		}

		declare { <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld2lane.v4i16.p0i16(<4 x i16>, <4 x i16>, i64, i16*) nounwind readonly


		define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
		;CHECK-LABEL: test_v4i32_post_imm_ld2lane:
		;CHECK: ld2.s { v0, v1 }[0], [x0], #8
		%ld2 = call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
		%tmp = getelementptr i32* %A, i32 2
		store i32* %tmp, i32** %ptr
		ret { <4 x i32>, <4 x i32> } %ld2
		}

		define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C) nounwind {
		;CHECK-LABEL: test_v4i32_post_reg_ld2lane:
		;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
		%ld2 = call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		store i32* %tmp, i32** %ptr
		ret { <4 x i32>, <4 x i32> } %ld2
		}

		declare { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readonly


		define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
		;CHECK-LABEL: test_v2i32_post_imm_ld2lane:
		;CHECK: ld2.s { v0, v1 }[0], [x0], #8
		%ld2 = call { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
		%tmp = getelementptr i32* %A, i32 2
		store i32* %tmp, i32** %ptr
		ret { <2 x i32>, <2 x i32> } %ld2
		}

		define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C) nounwind {
		;CHECK-LABEL: test_v2i32_post_reg_ld2lane:
		;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
		%ld2 = call { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		store i32* %tmp, i32** %ptr
		ret { <2 x i32>, <2 x i32> } %ld2
		}

		declare { <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld2lane.v2i32.p0i32(<2 x i32>, <2 x i32>, i64, i32*) nounwind readonly


		define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
		;CHECK-LABEL: test_v2i64_post_imm_ld2lane:
		;CHECK: ld2.d { v0, v1 }[0], [x0], #16
		%ld2 = call { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
		%tmp = getelementptr i64* %A, i32 2
		store i64* %tmp, i64** %ptr
		ret { <2 x i64>, <2 x i64> } %ld2
		}

		define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C) nounwind {
		;CHECK-LABEL: test_v2i64_post_reg_ld2lane:
		;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
		%ld2 = call { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		store i64* %tmp, i64** %ptr
		ret { <2 x i64>, <2 x i64> } %ld2
		}

		declare { <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readonly


		define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
		;CHECK-LABEL: test_v1i64_post_imm_ld2lane:
		;CHECK: ld2.d { v0, v1 }[0], [x0], #16
		%ld2 = call { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
		%tmp = getelementptr i64* %A, i32 2
		store i64* %tmp, i64** %ptr
		ret { <1 x i64>, <1 x i64> } %ld2
		}

		define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C) nounwind {
		;CHECK-LABEL: test_v1i64_post_reg_ld2lane:
		;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
		%ld2 = call { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		store i64* %tmp, i64** %ptr
		ret { <1 x i64>, <1 x i64> } %ld2
		}

		declare { <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld2lane.v1i64.p0i64(<1 x i64>, <1 x i64>, i64, i64*) nounwind readonly


		define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind {
		;CHECK-LABEL: test_v4f32_post_imm_ld2lane:
		;CHECK: ld2.s { v0, v1 }[0], [x0], #8
		%ld2 = call { <4 x float>, <4 x float> } @llvm.arm64.neon.ld2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
		%tmp = getelementptr float* %A, i32 2
		store float* %tmp, float** %ptr
		ret { <4 x float>, <4 x float> } %ld2
		}

		define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C) nounwind {
		;CHECK-LABEL: test_v4f32_post_reg_ld2lane:
		;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
		%ld2 = call { <4 x float>, <4 x float> } @llvm.arm64.neon.ld2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		store float* %tmp, float** %ptr
		ret { <4 x float>, <4 x float> } %ld2
		}

		declare { <4 x float>, <4 x float> } @llvm.arm64.neon.ld2lane.v4f32.p0f32(<4 x float>, <4 x float>, i64, float*) nounwind readonly


		define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind {
		;CHECK-LABEL: test_v2f32_post_imm_ld2lane:
		;CHECK: ld2.s { v0, v1 }[0], [x0], #8
		%ld2 = call { <2 x float>, <2 x float> } @llvm.arm64.neon.ld2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
		%tmp = getelementptr float* %A, i32 2
		store float* %tmp, float** %ptr
		ret { <2 x float>, <2 x float> } %ld2
		}

		define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C) nounwind {
		;CHECK-LABEL: test_v2f32_post_reg_ld2lane:
		;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
		%ld2 = call { <2 x float>, <2 x float> } @llvm.arm64.neon.ld2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		store float* %tmp, float** %ptr
		ret { <2 x float>, <2 x float> } %ld2
		}

		declare { <2 x float>, <2 x float> } @llvm.arm64.neon.ld2lane.v2f32.p0f32(<2 x float>, <2 x float>, i64, float*) nounwind readonly


		define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind {
		;CHECK-LABEL: test_v2f64_post_imm_ld2lane:
		;CHECK: ld2.d { v0, v1 }[0], [x0], #16
		%ld2 = call { <2 x double>, <2 x double> } @llvm.arm64.neon.ld2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
		%tmp = getelementptr double* %A, i32 2
		store double* %tmp, double** %ptr
		ret { <2 x double>, <2 x double> } %ld2
		}

		define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C) nounwind {
		;CHECK-LABEL: test_v2f64_post_reg_ld2lane:
		;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
		%ld2 = call { <2 x double>, <2 x double> } @llvm.arm64.neon.ld2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		store double* %tmp, double** %ptr
		ret { <2 x double>, <2 x double> } %ld2
		}

		declare { <2 x double>, <2 x double> } @llvm.arm64.neon.ld2lane.v2f64.p0f64(<2 x double>, <2 x double>, i64, double*) nounwind readonly


		define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind {
		;CHECK-LABEL: test_v1f64_post_imm_ld2lane:
		;CHECK: ld2.d { v0, v1 }[0], [x0], #16
		%ld2 = call { <1 x double>, <1 x double> } @llvm.arm64.neon.ld2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
		%tmp = getelementptr double* %A, i32 2
		store double* %tmp, double** %ptr
		ret { <1 x double>, <1 x double> } %ld2
		}

		define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C) nounwind {
		;CHECK-LABEL: test_v1f64_post_reg_ld2lane:
		;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
		%ld2 = call { <1 x double>, <1 x double> } @llvm.arm64.neon.ld2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		store double* %tmp, double** %ptr
		ret { <1 x double>, <1 x double> } %ld2
		}

		declare { <1 x double>, <1 x double> } @llvm.arm64.neon.ld2lane.v1f64.p0f64(<1 x double>, <1 x double>, i64, double*) nounwind readonly


		define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
		;CHECK-LABEL: test_v16i8_post_imm_ld3lane:
		;CHECK: ld3.b { v0, v1, v2 }[0], [x0], #3
		%ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
		%tmp = getelementptr i8* %A, i32 3
		store i8* %tmp, i8** %ptr
		ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
		}

		define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
		;CHECK-LABEL: test_v16i8_post_reg_ld3lane:
		;CHECK: ld3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
		%ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		store i8* %tmp, i8** %ptr
		ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
		}

		declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly


		define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
		;CHECK-LABEL: test_v8i8_post_imm_ld3lane:
		;CHECK: ld3.b { v0, v1, v2 }[0], [x0], #3
		%ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
		%tmp = getelementptr i8* %A, i32 3
		store i8* %tmp, i8** %ptr
		ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
		}

		define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
		;CHECK-LABEL: test_v8i8_post_reg_ld3lane:
		;CHECK: ld3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
		%ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		store i8* %tmp, i8** %ptr
		ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
		}

		declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld3lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i64, i8*) nounwind readonly


		define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
		;CHECK-LABEL: test_v8i16_post_imm_ld3lane:
		;CHECK: ld3.h { v0, v1, v2 }[0], [x0], #6
		%ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
		%tmp = getelementptr i16* %A, i32 3
		store i16* %tmp, i16** %ptr
		ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
		}

		define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
		;CHECK-LABEL: test_v8i16_post_reg_ld3lane:
		;CHECK: ld3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
		%ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		store i16* %tmp, i16** %ptr
		ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
		}

		declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly


		define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
		;CHECK-LABEL: test_v4i16_post_imm_ld3lane:
		;CHECK: ld3.h { v0, v1, v2 }[0], [x0], #6
		%ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
		%tmp = getelementptr i16* %A, i32 3
		store i16* %tmp, i16** %ptr
		ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
		}

		define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
		;CHECK-LABEL: test_v4i16_post_reg_ld3lane:
		;CHECK: ld3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
		%ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		store i16* %tmp, i16** %ptr
		ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
		}

		declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld3lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i64, i16*) nounwind readonly


		define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
		;CHECK-LABEL: test_v4i32_post_imm_ld3lane:
		;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
		%ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
		%tmp = getelementptr i32* %A, i32 3
		store i32* %tmp, i32** %ptr
		ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
		}

		define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
		;CHECK-LABEL: test_v4i32_post_reg_ld3lane:
		;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
		%ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		store i32* %tmp, i32** %ptr
		ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
		}

		declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly


		define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
		;CHECK-LABEL: test_v2i32_post_imm_ld3lane:
		;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
		%ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
		%tmp = getelementptr i32* %A, i32 3
		store i32* %tmp, i32** %ptr
		ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
		}

		define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
		;CHECK-LABEL: test_v2i32_post_reg_ld3lane:
		;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
		%ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		store i32* %tmp, i32** %ptr
		ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
		}

		declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld3lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i64, i32*) nounwind readonly


		define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
		;CHECK-LABEL: test_v2i64_post_imm_ld3lane:
		;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
		%ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
		%tmp = getelementptr i64* %A, i32 3
		store i64* %tmp, i64** %ptr
		ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
		}

		define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
		;CHECK-LABEL: test_v2i64_post_reg_ld3lane:
		;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
		%ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		store i64* %tmp, i64** %ptr
		ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
		}

		declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly


		define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
		;CHECK-LABEL: test_v1i64_post_imm_ld3lane:
		;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
		%ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
		%tmp = getelementptr i64* %A, i32 3
		store i64* %tmp, i64** %ptr
		ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
		}

		define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
		;CHECK-LABEL: test_v1i64_post_reg_ld3lane:
		;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
		%ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		store i64* %tmp, i64** %ptr
		ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
		}

		declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld3lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64, i64*) nounwind readonly


		define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
		;CHECK-LABEL: test_v4f32_post_imm_ld3lane:
		;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
		%ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
		%tmp = getelementptr float* %A, i32 3
		store float* %tmp, float** %ptr
		ret { <4 x float>, <4 x float>, <4 x float> } %ld3
		}

		define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
		;CHECK-LABEL: test_v4f32_post_reg_ld3lane:
		;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
		%ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		store float* %tmp, float** %ptr
		ret { <4 x float>, <4 x float>, <4 x float> } %ld3
		}

		declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld3lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, i64, float*) nounwind readonly


		define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
		;CHECK-LABEL: test_v2f32_post_imm_ld3lane:
		;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
		%ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
		%tmp = getelementptr float* %A, i32 3
		store float* %tmp, float** %ptr
		ret { <2 x float>, <2 x float>, <2 x float> } %ld3
		}

		define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
		;CHECK-LABEL: test_v2f32_post_reg_ld3lane:
		;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
		%ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		store float* %tmp, float** %ptr
		ret { <2 x float>, <2 x float>, <2 x float> } %ld3
		}

		declare { <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld3lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, i64, float*) nounwind readonly


		define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
		;CHECK-LABEL: test_v2f64_post_imm_ld3lane:
		;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
		%ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
		%tmp = getelementptr double* %A, i32 3
		store double* %tmp, double** %ptr
		ret { <2 x double>, <2 x double>, <2 x double> } %ld3
		}

		define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
		;CHECK-LABEL: test_v2f64_post_reg_ld3lane:
		;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
		%ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		store double* %tmp, double** %ptr
		ret { <2 x double>, <2 x double>, <2 x double> } %ld3
		}

		declare { <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld3lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, i64, double*) nounwind readonly


		define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
		;CHECK-LABEL: test_v1f64_post_imm_ld3lane:
		;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
		%ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
		%tmp = getelementptr double* %A, i32 3
		store double* %tmp, double** %ptr
		ret { <1 x double>, <1 x double>, <1 x double> } %ld3
		}

		define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
		;CHECK-LABEL: test_v1f64_post_reg_ld3lane:
		;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
		%ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		store double* %tmp, double** %ptr
		ret { <1 x double>, <1 x double>, <1 x double> } %ld3
		}

		declare { <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld3lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, i64, double*) nounwind readonly


		define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
		;CHECK-LABEL: test_v16i8_post_imm_ld4lane:
		;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], #4
		%ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
		%tmp = getelementptr i8* %A, i32 4
		store i8* %tmp, i8** %ptr
		ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
		}

		define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
		;CHECK-LABEL: test_v16i8_post_reg_ld4lane:
		;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
		%ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		store i8* %tmp, i8** %ptr
		ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
		}

		declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm64.neon.ld4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly


		define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
		;CHECK-LABEL: test_v8i8_post_imm_ld4lane:
		;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], #4
		%ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
		%tmp = getelementptr i8* %A, i32 4
		store i8* %tmp, i8** %ptr
		ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
		}

		define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
		;CHECK-LABEL: test_v8i8_post_reg_ld4lane:
		;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
		%ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		store i8* %tmp, i8** %ptr
		ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
		}

		declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm64.neon.ld4lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i64, i8*) nounwind readonly


		define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
		;CHECK-LABEL: test_v8i16_post_imm_ld4lane:
		;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], #8
		%ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
		%tmp = getelementptr i16* %A, i32 4
		store i16* %tmp, i16** %ptr
		ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
		}

		define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
		;CHECK-LABEL: test_v8i16_post_reg_ld4lane:
		;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
		%ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		store i16* %tmp, i16** %ptr
		ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
		}

		declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm64.neon.ld4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly


		define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
		;CHECK-LABEL: test_v4i16_post_imm_ld4lane:
		;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], #8
		%ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
		%tmp = getelementptr i16* %A, i32 4
		store i16* %tmp, i16** %ptr
		ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
		}

		define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
		;CHECK-LABEL: test_v4i16_post_reg_ld4lane:
		;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
		%ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		store i16* %tmp, i16** %ptr
		ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
		}

		declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.arm64.neon.ld4lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i64, i16*) nounwind readonly


		define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
		;CHECK-LABEL: test_v4i32_post_imm_ld4lane:
		;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
		%ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
		%tmp = getelementptr i32* %A, i32 4
		store i32* %tmp, i32** %ptr
		ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
		}

		define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
		;CHECK-LABEL: test_v4i32_post_reg_ld4lane:
		;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
		%ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		store i32* %tmp, i32** %ptr
		ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
		}

		declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly


		define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
		;CHECK-LABEL: test_v2i32_post_imm_ld4lane:
		;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
		%ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
		%tmp = getelementptr i32* %A, i32 4
		store i32* %tmp, i32** %ptr
		ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
		}

		define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
		;CHECK-LABEL: test_v2i32_post_reg_ld4lane:
		;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
		%ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		store i32* %tmp, i32** %ptr
		ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
		}

		declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm64.neon.ld4lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i64, i32*) nounwind readonly


		define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
		;CHECK-LABEL: test_v2i64_post_imm_ld4lane:
		;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
		%ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
		%tmp = getelementptr i64* %A, i32 4
		store i64* %tmp, i64** %ptr
		ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
		}

		define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
		;CHECK-LABEL: test_v2i64_post_reg_ld4lane:
		;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
		%ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		store i64* %tmp, i64** %ptr
		ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
		}

		declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.arm64.neon.ld4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly


		define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
		;CHECK-LABEL: test_v1i64_post_imm_ld4lane:
		;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
		%ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
		%tmp = getelementptr i64* %A, i32 4
		store i64* %tmp, i64** %ptr
		ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
		}

		define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
		;CHECK-LABEL: test_v1i64_post_reg_ld4lane:
		;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
		%ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		store i64* %tmp, i64** %ptr
		ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
		}

		declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm64.neon.ld4lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, i64*) nounwind readonly


		define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
		;CHECK-LABEL: test_v4f32_post_imm_ld4lane:
		;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
		%ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
		%tmp = getelementptr float* %A, i32 4
		store float* %tmp, float** %ptr
		ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
		}

		define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
		;CHECK-LABEL: test_v4f32_post_reg_ld4lane:
		;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
		%ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		store float* %tmp, float** %ptr
		ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
		}

		declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm64.neon.ld4lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, i64, float*) nounwind readonly


		define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
		;CHECK-LABEL: test_v2f32_post_imm_ld4lane:
		;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
		%ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
		%tmp = getelementptr float* %A, i32 4
		store float* %tmp, float** %ptr
		ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
		}

		define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
		;CHECK-LABEL: test_v2f32_post_reg_ld4lane:
		;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
		%ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		store float* %tmp, float** %ptr
		ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
		}

		declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.arm64.neon.ld4lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, i64, float*) nounwind readonly


		define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
		;CHECK-LABEL: test_v2f64_post_imm_ld4lane:
		;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
		%ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
		%tmp = getelementptr double* %A, i32 4
		store double* %tmp, double** %ptr
		ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
		}

		define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
		;CHECK-LABEL: test_v2f64_post_reg_ld4lane:
		;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
		%ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		store double* %tmp, double** %ptr
		ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
		}

		declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.arm64.neon.ld4lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, i64, double*) nounwind readonly


		define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
		;CHECK-LABEL: test_v1f64_post_imm_ld4lane:
		;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
		%ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
		%tmp = getelementptr double* %A, i32 4
		store double* %tmp, double** %ptr
		ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
		}

		define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
		;CHECK-LABEL: test_v1f64_post_reg_ld4lane:
		;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
		%ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		store double* %tmp, double** %ptr
		ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
		}

		declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.arm64.neon.ld4lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, i64, double*) nounwind readonly


		define i8* @test_v16i8_post_imm_st2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
		;CHECK-LABEL: test_v16i8_post_imm_st2:
		;CHECK: st2.16b { v0, v1 }, [x0], #32
		call void @llvm.arm64.neon.st2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
		%tmp = getelementptr i8* %A, i32 32
		ret i8* %tmp
		}

		define i8* @test_v16i8_post_reg_st2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v16i8_post_reg_st2:
		;CHECK: st2.16b { v0, v1 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		ret i8* %tmp
		}

		declare void @llvm.arm64.neon.st2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*)


		define i8* @test_v8i8_post_imm_st2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
		;CHECK-LABEL: test_v8i8_post_imm_st2:
		;CHECK: st2.8b { v0, v1 }, [x0], #16
		call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
		%tmp = getelementptr i8* %A, i32 16
		ret i8* %tmp
		}

		define i8* @test_v8i8_post_reg_st2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v8i8_post_reg_st2:
		;CHECK: st2.8b { v0, v1 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		ret i8* %tmp
		}

		declare void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*)


		define i16* @test_v8i16_post_imm_st2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
		;CHECK-LABEL: test_v8i16_post_imm_st2:
		;CHECK: st2.8h { v0, v1 }, [x0], #32
		call void @llvm.arm64.neon.st2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
		%tmp = getelementptr i16* %A, i32 16
		ret i16* %tmp
		}

		define i16* @test_v8i16_post_reg_st2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v8i16_post_reg_st2:
		;CHECK: st2.8h { v0, v1 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		ret i16* %tmp
		}

		declare void @llvm.arm64.neon.st2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*)


		define i16* @test_v4i16_post_imm_st2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
		;CHECK-LABEL: test_v4i16_post_imm_st2:
		;CHECK: st2.4h { v0, v1 }, [x0], #16
		call void @llvm.arm64.neon.st2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
		%tmp = getelementptr i16* %A, i32 8
		ret i16* %tmp
		}

		define i16* @test_v4i16_post_reg_st2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4i16_post_reg_st2:
		;CHECK: st2.4h { v0, v1 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		ret i16* %tmp
		}

		declare void @llvm.arm64.neon.st2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*)


		define i32* @test_v4i32_post_imm_st2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
		;CHECK-LABEL: test_v4i32_post_imm_st2:
		;CHECK: st2.4s { v0, v1 }, [x0], #32
		call void @llvm.arm64.neon.st2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
		%tmp = getelementptr i32* %A, i32 8
		ret i32* %tmp
		}

		define i32* @test_v4i32_post_reg_st2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4i32_post_reg_st2:
		;CHECK: st2.4s { v0, v1 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		ret i32* %tmp
		}

		declare void @llvm.arm64.neon.st2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*)


		define i32* @test_v2i32_post_imm_st2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
		;CHECK-LABEL: test_v2i32_post_imm_st2:
		;CHECK: st2.2s { v0, v1 }, [x0], #16
		call void @llvm.arm64.neon.st2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
		%tmp = getelementptr i32* %A, i32 4
		ret i32* %tmp
		}

		define i32* @test_v2i32_post_reg_st2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2i32_post_reg_st2:
		;CHECK: st2.2s { v0, v1 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		ret i32* %tmp
		}

		declare void @llvm.arm64.neon.st2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*)


		define i64* @test_v2i64_post_imm_st2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
		;CHECK-LABEL: test_v2i64_post_imm_st2:
		;CHECK: st2.2d { v0, v1 }, [x0], #32
		call void @llvm.arm64.neon.st2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
		%tmp = getelementptr i64* %A, i64 4
		ret i64* %tmp
		}

		define i64* @test_v2i64_post_reg_st2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2i64_post_reg_st2:
		;CHECK: st2.2d { v0, v1 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		ret i64* %tmp
		}

		declare void @llvm.arm64.neon.st2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*)


		define i64* @test_v1i64_post_imm_st2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
		;CHECK-LABEL: test_v1i64_post_imm_st2:
		;CHECK: st1.1d { v0, v1 }, [x0], #16
		call void @llvm.arm64.neon.st2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
		%tmp = getelementptr i64* %A, i64 2
		ret i64* %tmp
		}

		define i64* @test_v1i64_post_reg_st2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v1i64_post_reg_st2:
		;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		ret i64* %tmp
		}

		declare void @llvm.arm64.neon.st2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*)


		define float* @test_v4f32_post_imm_st2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind {
		;CHECK-LABEL: test_v4f32_post_imm_st2:
		;CHECK: st2.4s { v0, v1 }, [x0], #32
		call void @llvm.arm64.neon.st2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
		%tmp = getelementptr float* %A, i32 8
		ret float* %tmp
		}

		define float* @test_v4f32_post_reg_st2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4f32_post_reg_st2:
		;CHECK: st2.4s { v0, v1 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		ret float* %tmp
		}

		declare void @llvm.arm64.neon.st2.v4f32.p0f32(<4 x float>, <4 x float>, float*)


		define float* @test_v2f32_post_imm_st2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind {
		;CHECK-LABEL: test_v2f32_post_imm_st2:
		;CHECK: st2.2s { v0, v1 }, [x0], #16
		call void @llvm.arm64.neon.st2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
		%tmp = getelementptr float* %A, i32 4
		ret float* %tmp
		}

		define float* @test_v2f32_post_reg_st2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2f32_post_reg_st2:
		;CHECK: st2.2s { v0, v1 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		ret float* %tmp
		}

		declare void @llvm.arm64.neon.st2.v2f32.p0f32(<2 x float>, <2 x float>, float*)


		define double* @test_v2f64_post_imm_st2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind {
		;CHECK-LABEL: test_v2f64_post_imm_st2:
		;CHECK: st2.2d { v0, v1 }, [x0], #32
		call void @llvm.arm64.neon.st2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
		%tmp = getelementptr double* %A, i64 4
		ret double* %tmp
		}

		define double* @test_v2f64_post_reg_st2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2f64_post_reg_st2:
		;CHECK: st2.2d { v0, v1 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		ret double* %tmp
		}

		declare void @llvm.arm64.neon.st2.v2f64.p0f64(<2 x double>, <2 x double>, double*)


		define double* @test_v1f64_post_imm_st2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind {
		;CHECK-LABEL: test_v1f64_post_imm_st2:
		;CHECK: st1.1d { v0, v1 }, [x0], #16
		call void @llvm.arm64.neon.st2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
		%tmp = getelementptr double* %A, i64 2
		ret double* %tmp
		}

		define double* @test_v1f64_post_reg_st2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v1f64_post_reg_st2:
		;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		ret double* %tmp
		}

		declare void @llvm.arm64.neon.st2.v1f64.p0f64(<1 x double>, <1 x double>, double*)


		define i8* @test_v16i8_post_imm_st3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
		;CHECK-LABEL: test_v16i8_post_imm_st3:
		;CHECK: st3.16b { v0, v1, v2 }, [x0], #48
		call void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
		%tmp = getelementptr i8* %A, i32 48
		ret i8* %tmp
		}

		define i8* @test_v16i8_post_reg_st3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v16i8_post_reg_st3:
		;CHECK: st3.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		ret i8* %tmp
		}

		declare void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*)


		define i8* @test_v8i8_post_imm_st3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
		;CHECK-LABEL: test_v8i8_post_imm_st3:
		;CHECK: st3.8b { v0, v1, v2 }, [x0], #24
		call void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
		%tmp = getelementptr i8* %A, i32 24
		ret i8* %tmp
		}

		define i8* @test_v8i8_post_reg_st3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v8i8_post_reg_st3:
		;CHECK: st3.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		ret i8* %tmp
		}

		declare void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*)


		define i16* @test_v8i16_post_imm_st3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
		;CHECK-LABEL: test_v8i16_post_imm_st3:
		;CHECK: st3.8h { v0, v1, v2 }, [x0], #48
		call void @llvm.arm64.neon.st3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
		%tmp = getelementptr i16* %A, i32 24
		ret i16* %tmp
		}

		define i16* @test_v8i16_post_reg_st3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v8i16_post_reg_st3:
		;CHECK: st3.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		ret i16* %tmp
		}

		declare void @llvm.arm64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*)


		define i16* @test_v4i16_post_imm_st3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
		;CHECK-LABEL: test_v4i16_post_imm_st3:
		;CHECK: st3.4h { v0, v1, v2 }, [x0], #24
		call void @llvm.arm64.neon.st3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
		%tmp = getelementptr i16* %A, i32 12
		ret i16* %tmp
		}

		define i16* @test_v4i16_post_reg_st3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4i16_post_reg_st3:
		;CHECK: st3.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		ret i16* %tmp
		}

		declare void @llvm.arm64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*)


		define i32* @test_v4i32_post_imm_st3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
		;CHECK-LABEL: test_v4i32_post_imm_st3:
		;CHECK: st3.4s { v0, v1, v2 }, [x0], #48
		call void @llvm.arm64.neon.st3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
		%tmp = getelementptr i32* %A, i32 12
		ret i32* %tmp
		}

		define i32* @test_v4i32_post_reg_st3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4i32_post_reg_st3:
		;CHECK: st3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		ret i32* %tmp
		}

		declare void @llvm.arm64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*)


		define i32* @test_v2i32_post_imm_st3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
		;CHECK-LABEL: test_v2i32_post_imm_st3:
		;CHECK: st3.2s { v0, v1, v2 }, [x0], #24
		call void @llvm.arm64.neon.st3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
		%tmp = getelementptr i32* %A, i32 6
		ret i32* %tmp
		}

		define i32* @test_v2i32_post_reg_st3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2i32_post_reg_st3:
		;CHECK: st3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		ret i32* %tmp
		}

		declare void @llvm.arm64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*)


		define i64* @test_v2i64_post_imm_st3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
		;CHECK-LABEL: test_v2i64_post_imm_st3:
		;CHECK: st3.2d { v0, v1, v2 }, [x0], #48
		call void @llvm.arm64.neon.st3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
		%tmp = getelementptr i64* %A, i64 6
		ret i64* %tmp
		}

		define i64* @test_v2i64_post_reg_st3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2i64_post_reg_st3:
		;CHECK: st3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		ret i64* %tmp
		}

		declare void @llvm.arm64.neon.st3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*)


		define i64* @test_v1i64_post_imm_st3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
		;CHECK-LABEL: test_v1i64_post_imm_st3:
		;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
		call void @llvm.arm64.neon.st3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
		%tmp = getelementptr i64* %A, i64 3
		ret i64* %tmp
		}

		define i64* @test_v1i64_post_reg_st3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v1i64_post_reg_st3:
		;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		ret i64* %tmp
		}

		declare void @llvm.arm64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*)


		define float* @test_v4f32_post_imm_st3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
		;CHECK-LABEL: test_v4f32_post_imm_st3:
		;CHECK: st3.4s { v0, v1, v2 }, [x0], #48
		call void @llvm.arm64.neon.st3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
		%tmp = getelementptr float* %A, i32 12
		ret float* %tmp
		}

		define float* @test_v4f32_post_reg_st3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4f32_post_reg_st3:
		;CHECK: st3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		ret float* %tmp
		}

		declare void @llvm.arm64.neon.st3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*)


		define float* @test_v2f32_post_imm_st3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
		;CHECK-LABEL: test_v2f32_post_imm_st3:
		;CHECK: st3.2s { v0, v1, v2 }, [x0], #24
		call void @llvm.arm64.neon.st3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
		%tmp = getelementptr float* %A, i32 6
		ret float* %tmp
		}

		define float* @test_v2f32_post_reg_st3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2f32_post_reg_st3:
		;CHECK: st3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		ret float* %tmp
		}

		declare void @llvm.arm64.neon.st3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*)


		define double* @test_v2f64_post_imm_st3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
		;CHECK-LABEL: test_v2f64_post_imm_st3:
		;CHECK: st3.2d { v0, v1, v2 }, [x0], #48
		call void @llvm.arm64.neon.st3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
		%tmp = getelementptr double* %A, i64 6
		ret double* %tmp
		}

		define double* @test_v2f64_post_reg_st3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2f64_post_reg_st3:
		;CHECK: st3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		ret double* %tmp
		}

		declare void @llvm.arm64.neon.st3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*)


		define double* @test_v1f64_post_imm_st3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
		;CHECK-LABEL: test_v1f64_post_imm_st3:
		;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
		call void @llvm.arm64.neon.st3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
		%tmp = getelementptr double* %A, i64 3
		ret double* %tmp
		}

		define double* @test_v1f64_post_reg_st3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v1f64_post_reg_st3:
		;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		ret double* %tmp
		}

		declare void @llvm.arm64.neon.st3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*)


		define i8* @test_v16i8_post_imm_st4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
		;CHECK-LABEL: test_v16i8_post_imm_st4:
		;CHECK: st4.16b { v0, v1, v2, v3 }, [x0], #64
		call void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
		%tmp = getelementptr i8* %A, i32 64
		ret i8* %tmp
		}

		define i8* @test_v16i8_post_reg_st4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v16i8_post_reg_st4:
		;CHECK: st4.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		ret i8* %tmp
		}

		declare void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*)


		define i8* @test_v8i8_post_imm_st4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
		;CHECK-LABEL: test_v8i8_post_imm_st4:
		;CHECK: st4.8b { v0, v1, v2, v3 }, [x0], #32
		call void @llvm.arm64.neon.st4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
		%tmp = getelementptr i8* %A, i32 32
		ret i8* %tmp
		}

		define i8* @test_v8i8_post_reg_st4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v8i8_post_reg_st4:
		;CHECK: st4.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		ret i8* %tmp
		}

		declare void @llvm.arm64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*)


		define i16* @test_v8i16_post_imm_st4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
		;CHECK-LABEL: test_v8i16_post_imm_st4:
		;CHECK: st4.8h { v0, v1, v2, v3 }, [x0], #64
		call void @llvm.arm64.neon.st4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
		%tmp = getelementptr i16* %A, i32 32
		ret i16* %tmp
		}

		define i16* @test_v8i16_post_reg_st4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v8i16_post_reg_st4:
		;CHECK: st4.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		ret i16* %tmp
		}

		declare void @llvm.arm64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*)


		define i16* @test_v4i16_post_imm_st4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
		;CHECK-LABEL: test_v4i16_post_imm_st4:
		;CHECK: st4.4h { v0, v1, v2, v3 }, [x0], #32
		call void @llvm.arm64.neon.st4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
		%tmp = getelementptr i16* %A, i32 16
		ret i16* %tmp
		}

		define i16* @test_v4i16_post_reg_st4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4i16_post_reg_st4:
		;CHECK: st4.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		ret i16* %tmp
		}

		declare void @llvm.arm64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>,<4 x i16>, i16*)


		define i32* @test_v4i32_post_imm_st4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
		;CHECK-LABEL: test_v4i32_post_imm_st4:
		;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], #64
		call void @llvm.arm64.neon.st4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
		%tmp = getelementptr i32* %A, i32 16
		ret i32* %tmp
		}

		define i32* @test_v4i32_post_reg_st4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4i32_post_reg_st4:
		;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		ret i32* %tmp
		}

		declare void @llvm.arm64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>,<4 x i32>, i32*)


		define i32* @test_v2i32_post_imm_st4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
		;CHECK-LABEL: test_v2i32_post_imm_st4:
		;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], #32
		call void @llvm.arm64.neon.st4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
		%tmp = getelementptr i32* %A, i32 8
		ret i32* %tmp
		}

		define i32* @test_v2i32_post_reg_st4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2i32_post_reg_st4:
		;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		ret i32* %tmp
		}

		declare void @llvm.arm64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*)


		define i64* @test_v2i64_post_imm_st4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
		;CHECK-LABEL: test_v2i64_post_imm_st4:
		;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], #64
		call void @llvm.arm64.neon.st4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
		%tmp = getelementptr i64* %A, i64 8
		ret i64* %tmp
		}

		define i64* @test_v2i64_post_reg_st4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2i64_post_reg_st4:
		;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		ret i64* %tmp
		}

		declare void @llvm.arm64.neon.st4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>,<2 x i64>, i64*)


		define i64* @test_v1i64_post_imm_st4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
		;CHECK-LABEL: test_v1i64_post_imm_st4:
		;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
		call void @llvm.arm64.neon.st4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
		%tmp = getelementptr i64* %A, i64 4
		ret i64* %tmp
		}

		define i64* @test_v1i64_post_reg_st4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v1i64_post_reg_st4:
		;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		ret i64* %tmp
		}

		declare void @llvm.arm64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>,<1 x i64>, i64*)


		define float* @test_v4f32_post_imm_st4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
		;CHECK-LABEL: test_v4f32_post_imm_st4:
		;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], #64
		call void @llvm.arm64.neon.st4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
		%tmp = getelementptr float* %A, i32 16
		ret float* %tmp
		}

		define float* @test_v4f32_post_reg_st4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4f32_post_reg_st4:
		;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		ret float* %tmp
		}

		declare void @llvm.arm64.neon.st4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*)


		define float* @test_v2f32_post_imm_st4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
		;CHECK-LABEL: test_v2f32_post_imm_st4:
		;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], #32
		call void @llvm.arm64.neon.st4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
		%tmp = getelementptr float* %A, i32 8
		ret float* %tmp
		}

		define float* @test_v2f32_post_reg_st4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2f32_post_reg_st4:
		;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		ret float* %tmp
		}

		declare void @llvm.arm64.neon.st4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*)


		define double* @test_v2f64_post_imm_st4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
		;CHECK-LABEL: test_v2f64_post_imm_st4:
		;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], #64
		call void @llvm.arm64.neon.st4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
		%tmp = getelementptr double* %A, i64 8
		ret double* %tmp
		}

		define double* @test_v2f64_post_reg_st4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2f64_post_reg_st4:
		;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		ret double* %tmp
		}

		declare void @llvm.arm64.neon.st4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>,<2 x double>, double*)


		define double* @test_v1f64_post_imm_st4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
		;CHECK-LABEL: test_v1f64_post_imm_st4:
		;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
		call void @llvm.arm64.neon.st4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
		%tmp = getelementptr double* %A, i64 4
		ret double* %tmp
		}

		define double* @test_v1f64_post_reg_st4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v1f64_post_reg_st4:
		;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		ret double* %tmp
		}

		declare void @llvm.arm64.neon.st4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*)


		define i8* @test_v16i8_post_imm_st1x2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
		;CHECK-LABEL: test_v16i8_post_imm_st1x2:
		;CHECK: st1.16b { v0, v1 }, [x0], #32
		call void @llvm.arm64.neon.st1x2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
		%tmp = getelementptr i8* %A, i32 32
		ret i8* %tmp
		}

		define i8* @test_v16i8_post_reg_st1x2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v16i8_post_reg_st1x2:
		;CHECK: st1.16b { v0, v1 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		ret i8* %tmp
		}

		declare void @llvm.arm64.neon.st1x2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*)


		define i8* @test_v8i8_post_imm_st1x2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
		;CHECK-LABEL: test_v8i8_post_imm_st1x2:
		;CHECK: st1.8b { v0, v1 }, [x0], #16
		call void @llvm.arm64.neon.st1x2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
		%tmp = getelementptr i8* %A, i32 16
		ret i8* %tmp
		}

		define i8* @test_v8i8_post_reg_st1x2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v8i8_post_reg_st1x2:
		;CHECK: st1.8b { v0, v1 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		ret i8* %tmp
		}

		declare void @llvm.arm64.neon.st1x2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*)


		define i16* @test_v8i16_post_imm_st1x2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
		;CHECK-LABEL: test_v8i16_post_imm_st1x2:
		;CHECK: st1.8h { v0, v1 }, [x0], #32
		call void @llvm.arm64.neon.st1x2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
		%tmp = getelementptr i16* %A, i32 16
		ret i16* %tmp
		}

		define i16* @test_v8i16_post_reg_st1x2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v8i16_post_reg_st1x2:
		;CHECK: st1.8h { v0, v1 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		ret i16* %tmp
		}

		declare void @llvm.arm64.neon.st1x2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*)


		define i16* @test_v4i16_post_imm_st1x2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
		;CHECK-LABEL: test_v4i16_post_imm_st1x2:
		;CHECK: st1.4h { v0, v1 }, [x0], #16
		call void @llvm.arm64.neon.st1x2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
		%tmp = getelementptr i16* %A, i32 8
		ret i16* %tmp
		}

		define i16* @test_v4i16_post_reg_st1x2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4i16_post_reg_st1x2:
		;CHECK: st1.4h { v0, v1 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		ret i16* %tmp
		}

		declare void @llvm.arm64.neon.st1x2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*)


		define i32* @test_v4i32_post_imm_st1x2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
		;CHECK-LABEL: test_v4i32_post_imm_st1x2:
		;CHECK: st1.4s { v0, v1 }, [x0], #32
		call void @llvm.arm64.neon.st1x2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
		%tmp = getelementptr i32* %A, i32 8
		ret i32* %tmp
		}

		define i32* @test_v4i32_post_reg_st1x2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4i32_post_reg_st1x2:
		;CHECK: st1.4s { v0, v1 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		ret i32* %tmp
		}

		declare void @llvm.arm64.neon.st1x2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*)


		define i32* @test_v2i32_post_imm_st1x2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
		;CHECK-LABEL: test_v2i32_post_imm_st1x2:
		;CHECK: st1.2s { v0, v1 }, [x0], #16
		call void @llvm.arm64.neon.st1x2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
		%tmp = getelementptr i32* %A, i32 4
		ret i32* %tmp
		}

		define i32* @test_v2i32_post_reg_st1x2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2i32_post_reg_st1x2:
		;CHECK: st1.2s { v0, v1 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		ret i32* %tmp
		}

		declare void @llvm.arm64.neon.st1x2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*)


		define i64* @test_v2i64_post_imm_st1x2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
		;CHECK-LABEL: test_v2i64_post_imm_st1x2:
		;CHECK: st1.2d { v0, v1 }, [x0], #32
		call void @llvm.arm64.neon.st1x2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
		%tmp = getelementptr i64* %A, i64 4
		ret i64* %tmp
		}

		define i64* @test_v2i64_post_reg_st1x2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2i64_post_reg_st1x2:
		;CHECK: st1.2d { v0, v1 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		ret i64* %tmp
		}

		declare void @llvm.arm64.neon.st1x2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*)


		define i64* @test_v1i64_post_imm_st1x2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
		;CHECK-LABEL: test_v1i64_post_imm_st1x2:
		;CHECK: st1.1d { v0, v1 }, [x0], #16
		call void @llvm.arm64.neon.st1x2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
		%tmp = getelementptr i64* %A, i64 2
		ret i64* %tmp
		}

		define i64* @test_v1i64_post_reg_st1x2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v1i64_post_reg_st1x2:
		;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		ret i64* %tmp
		}

		declare void @llvm.arm64.neon.st1x2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*)


		define float* @test_v4f32_post_imm_st1x2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind {
		;CHECK-LABEL: test_v4f32_post_imm_st1x2:
		;CHECK: st1.4s { v0, v1 }, [x0], #32
		call void @llvm.arm64.neon.st1x2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
		%tmp = getelementptr float* %A, i32 8
		ret float* %tmp
		}

		define float* @test_v4f32_post_reg_st1x2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4f32_post_reg_st1x2:
		;CHECK: st1.4s { v0, v1 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		ret float* %tmp
		}

		declare void @llvm.arm64.neon.st1x2.v4f32.p0f32(<4 x float>, <4 x float>, float*)


		define float* @test_v2f32_post_imm_st1x2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind {
		;CHECK-LABEL: test_v2f32_post_imm_st1x2:
		;CHECK: st1.2s { v0, v1 }, [x0], #16
		call void @llvm.arm64.neon.st1x2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
		%tmp = getelementptr float* %A, i32 4
		ret float* %tmp
		}

		define float* @test_v2f32_post_reg_st1x2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2f32_post_reg_st1x2:
		;CHECK: st1.2s { v0, v1 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		ret float* %tmp
		}

		declare void @llvm.arm64.neon.st1x2.v2f32.p0f32(<2 x float>, <2 x float>, float*)


		define double* @test_v2f64_post_imm_st1x2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind {
		;CHECK-LABEL: test_v2f64_post_imm_st1x2:
		;CHECK: st1.2d { v0, v1 }, [x0], #32
		call void @llvm.arm64.neon.st1x2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
		%tmp = getelementptr double* %A, i64 4
		ret double* %tmp
		}

		define double* @test_v2f64_post_reg_st1x2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2f64_post_reg_st1x2:
		;CHECK: st1.2d { v0, v1 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		ret double* %tmp
		}

		declare void @llvm.arm64.neon.st1x2.v2f64.p0f64(<2 x double>, <2 x double>, double*)


		define double* @test_v1f64_post_imm_st1x2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind {
		;CHECK-LABEL: test_v1f64_post_imm_st1x2:
		;CHECK: st1.1d { v0, v1 }, [x0], #16
		call void @llvm.arm64.neon.st1x2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
		%tmp = getelementptr double* %A, i64 2
		ret double* %tmp
		}

		define double* @test_v1f64_post_reg_st1x2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v1f64_post_reg_st1x2:
		;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		ret double* %tmp
		}

		declare void @llvm.arm64.neon.st1x2.v1f64.p0f64(<1 x double>, <1 x double>, double*)


		define i8* @test_v16i8_post_imm_st1x3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
		;CHECK-LABEL: test_v16i8_post_imm_st1x3:
		;CHECK: st1.16b { v0, v1, v2 }, [x0], #48
		call void @llvm.arm64.neon.st1x3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
		%tmp = getelementptr i8* %A, i32 48
		ret i8* %tmp
		}

		define i8* @test_v16i8_post_reg_st1x3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v16i8_post_reg_st1x3:
		;CHECK: st1.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		ret i8* %tmp
		}

		declare void @llvm.arm64.neon.st1x3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*)


		define i8* @test_v8i8_post_imm_st1x3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
		;CHECK-LABEL: test_v8i8_post_imm_st1x3:
		;CHECK: st1.8b { v0, v1, v2 }, [x0], #24
		call void @llvm.arm64.neon.st1x3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
		%tmp = getelementptr i8* %A, i32 24
		ret i8* %tmp
		}

		define i8* @test_v8i8_post_reg_st1x3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v8i8_post_reg_st1x3:
		;CHECK: st1.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		ret i8* %tmp
		}

		declare void @llvm.arm64.neon.st1x3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*)


		define i16* @test_v8i16_post_imm_st1x3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
		;CHECK-LABEL: test_v8i16_post_imm_st1x3:
		;CHECK: st1.8h { v0, v1, v2 }, [x0], #48
		call void @llvm.arm64.neon.st1x3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
		%tmp = getelementptr i16* %A, i32 24
		ret i16* %tmp
		}

		define i16* @test_v8i16_post_reg_st1x3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v8i16_post_reg_st1x3:
		;CHECK: st1.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		ret i16* %tmp
		}

		declare void @llvm.arm64.neon.st1x3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*)


		define i16* @test_v4i16_post_imm_st1x3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
		;CHECK-LABEL: test_v4i16_post_imm_st1x3:
		;CHECK: st1.4h { v0, v1, v2 }, [x0], #24
		call void @llvm.arm64.neon.st1x3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
		%tmp = getelementptr i16* %A, i32 12
		ret i16* %tmp
		}

		define i16* @test_v4i16_post_reg_st1x3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4i16_post_reg_st1x3:
		;CHECK: st1.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		ret i16* %tmp
		}

		declare void @llvm.arm64.neon.st1x3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*)


		define i32* @test_v4i32_post_imm_st1x3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
		;CHECK-LABEL: test_v4i32_post_imm_st1x3:
		;CHECK: st1.4s { v0, v1, v2 }, [x0], #48
		call void @llvm.arm64.neon.st1x3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
		%tmp = getelementptr i32* %A, i32 12
		ret i32* %tmp
		}

		define i32* @test_v4i32_post_reg_st1x3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4i32_post_reg_st1x3:
		;CHECK: st1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		ret i32* %tmp
		}

		declare void @llvm.arm64.neon.st1x3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*)


		define i32* @test_v2i32_post_imm_st1x3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
		;CHECK-LABEL: test_v2i32_post_imm_st1x3:
		;CHECK: st1.2s { v0, v1, v2 }, [x0], #24
		call void @llvm.arm64.neon.st1x3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
		%tmp = getelementptr i32* %A, i32 6
		ret i32* %tmp
		}

		define i32* @test_v2i32_post_reg_st1x3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2i32_post_reg_st1x3:
		;CHECK: st1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		ret i32* %tmp
		}

		declare void @llvm.arm64.neon.st1x3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*)


		define i64* @test_v2i64_post_imm_st1x3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
		;CHECK-LABEL: test_v2i64_post_imm_st1x3:
		;CHECK: st1.2d { v0, v1, v2 }, [x0], #48
		call void @llvm.arm64.neon.st1x3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
		%tmp = getelementptr i64* %A, i64 6
		ret i64* %tmp
		}

		define i64* @test_v2i64_post_reg_st1x3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2i64_post_reg_st1x3:
		;CHECK: st1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		ret i64* %tmp
		}

		declare void @llvm.arm64.neon.st1x3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*)


		define i64* @test_v1i64_post_imm_st1x3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
		;CHECK-LABEL: test_v1i64_post_imm_st1x3:
		;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
		call void @llvm.arm64.neon.st1x3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
		%tmp = getelementptr i64* %A, i64 3
		ret i64* %tmp
		}

		define i64* @test_v1i64_post_reg_st1x3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v1i64_post_reg_st1x3:
		;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		ret i64* %tmp
		}

		declare void @llvm.arm64.neon.st1x3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*)


		define float* @test_v4f32_post_imm_st1x3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
		;CHECK-LABEL: test_v4f32_post_imm_st1x3:
		;CHECK: st1.4s { v0, v1, v2 }, [x0], #48
		call void @llvm.arm64.neon.st1x3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
		%tmp = getelementptr float* %A, i32 12
		ret float* %tmp
		}

		define float* @test_v4f32_post_reg_st1x3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4f32_post_reg_st1x3:
		;CHECK: st1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		ret float* %tmp
		}

		declare void @llvm.arm64.neon.st1x3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*)


		define float* @test_v2f32_post_imm_st1x3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
		;CHECK-LABEL: test_v2f32_post_imm_st1x3:
		;CHECK: st1.2s { v0, v1, v2 }, [x0], #24
		call void @llvm.arm64.neon.st1x3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
		%tmp = getelementptr float* %A, i32 6
		ret float* %tmp
		}

		define float* @test_v2f32_post_reg_st1x3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2f32_post_reg_st1x3:
		;CHECK: st1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		ret float* %tmp
		}

		declare void @llvm.arm64.neon.st1x3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*)


		define double* @test_v2f64_post_imm_st1x3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
		;CHECK-LABEL: test_v2f64_post_imm_st1x3:
		;CHECK: st1.2d { v0, v1, v2 }, [x0], #48
		call void @llvm.arm64.neon.st1x3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
		%tmp = getelementptr double* %A, i64 6
		ret double* %tmp
		}

		define double* @test_v2f64_post_reg_st1x3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2f64_post_reg_st1x3:
		;CHECK: st1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		ret double* %tmp
		}

		declare void @llvm.arm64.neon.st1x3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*)


		define double* @test_v1f64_post_imm_st1x3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
		;CHECK-LABEL: test_v1f64_post_imm_st1x3:
		;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
		call void @llvm.arm64.neon.st1x3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
		%tmp = getelementptr double* %A, i64 3
		ret double* %tmp
		}

		define double* @test_v1f64_post_reg_st1x3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v1f64_post_reg_st1x3:
		;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		ret double* %tmp
		}

		declare void @llvm.arm64.neon.st1x3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*)


		define i8* @test_v16i8_post_imm_st1x4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
		;CHECK-LABEL: test_v16i8_post_imm_st1x4:
		;CHECK: st1.16b { v0, v1, v2, v3 }, [x0], #64
		call void @llvm.arm64.neon.st1x4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
		%tmp = getelementptr i8* %A, i32 64
		ret i8* %tmp
		}

		define i8* @test_v16i8_post_reg_st1x4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v16i8_post_reg_st1x4:
		;CHECK: st1.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		ret i8* %tmp
		}

		declare void @llvm.arm64.neon.st1x4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*)


		define i8* @test_v8i8_post_imm_st1x4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
		;CHECK-LABEL: test_v8i8_post_imm_st1x4:
		;CHECK: st1.8b { v0, v1, v2, v3 }, [x0], #32
		call void @llvm.arm64.neon.st1x4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
		%tmp = getelementptr i8* %A, i32 32
		ret i8* %tmp
		}

		define i8* @test_v8i8_post_reg_st1x4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v8i8_post_reg_st1x4:
		;CHECK: st1.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		ret i8* %tmp
		}

		declare void @llvm.arm64.neon.st1x4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*)


		define i16* @test_v8i16_post_imm_st1x4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
		;CHECK-LABEL: test_v8i16_post_imm_st1x4:
		;CHECK: st1.8h { v0, v1, v2, v3 }, [x0], #64
		call void @llvm.arm64.neon.st1x4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
		%tmp = getelementptr i16* %A, i32 32
		ret i16* %tmp
		}

		define i16* @test_v8i16_post_reg_st1x4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v8i16_post_reg_st1x4:
		;CHECK: st1.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		ret i16* %tmp
		}

		declare void @llvm.arm64.neon.st1x4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*)


		define i16* @test_v4i16_post_imm_st1x4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
		;CHECK-LABEL: test_v4i16_post_imm_st1x4:
		;CHECK: st1.4h { v0, v1, v2, v3 }, [x0], #32
		call void @llvm.arm64.neon.st1x4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
		%tmp = getelementptr i16* %A, i32 16
		ret i16* %tmp
		}

		define i16* @test_v4i16_post_reg_st1x4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4i16_post_reg_st1x4:
		;CHECK: st1.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		ret i16* %tmp
		}

		declare void @llvm.arm64.neon.st1x4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>,<4 x i16>, i16*)


		define i32* @test_v4i32_post_imm_st1x4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
		;CHECK-LABEL: test_v4i32_post_imm_st1x4:
		;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], #64
		call void @llvm.arm64.neon.st1x4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
		%tmp = getelementptr i32* %A, i32 16
		ret i32* %tmp
		}

		define i32* @test_v4i32_post_reg_st1x4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4i32_post_reg_st1x4:
		;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		ret i32* %tmp
		}

		declare void @llvm.arm64.neon.st1x4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>,<4 x i32>, i32*)


		define i32* @test_v2i32_post_imm_st1x4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
		;CHECK-LABEL: test_v2i32_post_imm_st1x4:
		;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], #32
		call void @llvm.arm64.neon.st1x4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
		%tmp = getelementptr i32* %A, i32 8
		ret i32* %tmp
		}

		define i32* @test_v2i32_post_reg_st1x4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2i32_post_reg_st1x4:
		;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		ret i32* %tmp
		}

		declare void @llvm.arm64.neon.st1x4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*)


		define i64* @test_v2i64_post_imm_st1x4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
		;CHECK-LABEL: test_v2i64_post_imm_st1x4:
		;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], #64
		call void @llvm.arm64.neon.st1x4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
		%tmp = getelementptr i64* %A, i64 8
		ret i64* %tmp
		}

		define i64* @test_v2i64_post_reg_st1x4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2i64_post_reg_st1x4:
		;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		ret i64* %tmp
		}

		declare void @llvm.arm64.neon.st1x4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>,<2 x i64>, i64*)


		define i64* @test_v1i64_post_imm_st1x4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
		;CHECK-LABEL: test_v1i64_post_imm_st1x4:
		;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
		call void @llvm.arm64.neon.st1x4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
		%tmp = getelementptr i64* %A, i64 4
		ret i64* %tmp
		}

		define i64* @test_v1i64_post_reg_st1x4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v1i64_post_reg_st1x4:
		;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		ret i64* %tmp
		}

		declare void @llvm.arm64.neon.st1x4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>,<1 x i64>, i64*)


		define float* @test_v4f32_post_imm_st1x4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
		;CHECK-LABEL: test_v4f32_post_imm_st1x4:
		;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], #64
		call void @llvm.arm64.neon.st1x4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
		%tmp = getelementptr float* %A, i32 16
		ret float* %tmp
		}

		define float* @test_v4f32_post_reg_st1x4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4f32_post_reg_st1x4:
		;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		ret float* %tmp
		}

		declare void @llvm.arm64.neon.st1x4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*)


		define float* @test_v2f32_post_imm_st1x4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
		;CHECK-LABEL: test_v2f32_post_imm_st1x4:
		;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], #32
		call void @llvm.arm64.neon.st1x4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
		%tmp = getelementptr float* %A, i32 8
		ret float* %tmp
		}

		define float* @test_v2f32_post_reg_st1x4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2f32_post_reg_st1x4:
		;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		ret float* %tmp
		}

		declare void @llvm.arm64.neon.st1x4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*)


		define double* @test_v2f64_post_imm_st1x4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
		;CHECK-LABEL: test_v2f64_post_imm_st1x4:
		;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], #64
		call void @llvm.arm64.neon.st1x4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
		%tmp = getelementptr double* %A, i64 8
		ret double* %tmp
		}

		define double* @test_v2f64_post_reg_st1x4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2f64_post_reg_st1x4:
		;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		ret double* %tmp
		}

		declare void @llvm.arm64.neon.st1x4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>,<2 x double>, double*)


		define double* @test_v1f64_post_imm_st1x4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
		;CHECK-LABEL: test_v1f64_post_imm_st1x4:
		;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
		call void @llvm.arm64.neon.st1x4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
		%tmp = getelementptr double* %A, i64 4
		ret double* %tmp
		}

		define double* @test_v1f64_post_reg_st1x4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v1f64_post_reg_st1x4:
		;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st1x4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		ret double* %tmp
		}

		declare void @llvm.arm64.neon.st1x4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*)


		define i8* @test_v16i8_post_imm_st2lanelane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) {
		call void @llvm.arm64.neon.st2lanelane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i64 1, i8* %A)
		%tmp = getelementptr i8* %A, i32 2
		ret i8* %tmp
		}

		define i8* @test_v16i8_post_reg_st2lanelane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) {
		call void @llvm.arm64.neon.st2lanelane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i64 1, i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		ret i8* %tmp
		}

		declare void @llvm.arm64.neon.st2lanelane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i64, i8*) nounwind readnone


		define i8* @test_v16i8_post_imm_st2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
		;CHECK-LABEL: test_v16i8_post_imm_st2lane:
		;CHECK: st2.b { v0, v1 }[0], [x0], #2
		call void @llvm.arm64.neon.st2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
		%tmp = getelementptr i8* %A, i32 2
		ret i8* %tmp
		}

		define i8* @test_v16i8_post_reg_st2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v16i8_post_reg_st2lane:
		;CHECK: st2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		ret i8* %tmp
		}

		declare void @llvm.arm64.neon.st2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*)


		define i8* @test_v8i8_post_imm_st2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
		;CHECK-LABEL: test_v8i8_post_imm_st2lane:
		;CHECK: st2.b { v0, v1 }[0], [x0], #2
		call void @llvm.arm64.neon.st2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
		%tmp = getelementptr i8* %A, i32 2
		ret i8* %tmp
		}

		define i8* @test_v8i8_post_reg_st2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v8i8_post_reg_st2lane:
		;CHECK: st2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		ret i8* %tmp
		}

		declare void @llvm.arm64.neon.st2lane.v8i8.p0i8(<8 x i8>, <8 x i8>, i64, i8*)


		define i16* @test_v8i16_post_imm_st2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
		;CHECK-LABEL: test_v8i16_post_imm_st2lane:
		;CHECK: st2.h { v0, v1 }[0], [x0], #4
		call void @llvm.arm64.neon.st2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
		%tmp = getelementptr i16* %A, i32 2
		ret i16* %tmp
		}

		define i16* @test_v8i16_post_reg_st2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v8i16_post_reg_st2lane:
		;CHECK: st2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		ret i16* %tmp
		}

		declare void @llvm.arm64.neon.st2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*)


		define i16* @test_v4i16_post_imm_st2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
		;CHECK-LABEL: test_v4i16_post_imm_st2lane:
		;CHECK: st2.h { v0, v1 }[0], [x0], #4
		call void @llvm.arm64.neon.st2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
		%tmp = getelementptr i16* %A, i32 2
		ret i16* %tmp
		}

		define i16* @test_v4i16_post_reg_st2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4i16_post_reg_st2lane:
		;CHECK: st2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		ret i16* %tmp
		}

		declare void @llvm.arm64.neon.st2lane.v4i16.p0i16(<4 x i16>, <4 x i16>, i64, i16*)


		define i32* @test_v4i32_post_imm_st2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
		;CHECK-LABEL: test_v4i32_post_imm_st2lane:
		;CHECK: st2.s { v0, v1 }[0], [x0], #8
		call void @llvm.arm64.neon.st2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
		%tmp = getelementptr i32* %A, i32 2
		ret i32* %tmp
		}

		define i32* @test_v4i32_post_reg_st2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4i32_post_reg_st2lane:
		;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		ret i32* %tmp
		}

		declare void @llvm.arm64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*)


		define i32* @test_v2i32_post_imm_st2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
		;CHECK-LABEL: test_v2i32_post_imm_st2lane:
		;CHECK: st2.s { v0, v1 }[0], [x0], #8
		call void @llvm.arm64.neon.st2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
		%tmp = getelementptr i32* %A, i32 2
		ret i32* %tmp
		}

		define i32* @test_v2i32_post_reg_st2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2i32_post_reg_st2lane:
		;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		ret i32* %tmp
		}

		declare void @llvm.arm64.neon.st2lane.v2i32.p0i32(<2 x i32>, <2 x i32>, i64, i32*)


		define i64* @test_v2i64_post_imm_st2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
		;CHECK-LABEL: test_v2i64_post_imm_st2lane:
		;CHECK: st2.d { v0, v1 }[0], [x0], #16
		call void @llvm.arm64.neon.st2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
		%tmp = getelementptr i64* %A, i64 2
		ret i64* %tmp
		}

		define i64* @test_v2i64_post_reg_st2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2i64_post_reg_st2lane:
		;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		ret i64* %tmp
		}

		declare void @llvm.arm64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*)


		define i64* @test_v1i64_post_imm_st2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
		;CHECK-LABEL: test_v1i64_post_imm_st2lane:
		;CHECK: st2.d { v0, v1 }[0], [x0], #16
		call void @llvm.arm64.neon.st2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
		%tmp = getelementptr i64* %A, i64 2
		ret i64* %tmp
		}

		define i64* @test_v1i64_post_reg_st2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v1i64_post_reg_st2lane:
		;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		ret i64* %tmp
		}

		declare void @llvm.arm64.neon.st2lane.v1i64.p0i64(<1 x i64>, <1 x i64>, i64, i64*)


		define float* @test_v4f32_post_imm_st2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind {
		;CHECK-LABEL: test_v4f32_post_imm_st2lane:
		;CHECK: st2.s { v0, v1 }[0], [x0], #8
		call void @llvm.arm64.neon.st2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
		%tmp = getelementptr float* %A, i32 2
		ret float* %tmp
		}

		define float* @test_v4f32_post_reg_st2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4f32_post_reg_st2lane:
		;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		ret float* %tmp
		}

		declare void @llvm.arm64.neon.st2lane.v4f32.p0f32(<4 x float>, <4 x float>, i64, float*)


		define float* @test_v2f32_post_imm_st2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind {
		;CHECK-LABEL: test_v2f32_post_imm_st2lane:
		;CHECK: st2.s { v0, v1 }[0], [x0], #8
		call void @llvm.arm64.neon.st2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
		%tmp = getelementptr float* %A, i32 2
		ret float* %tmp
		}

		define float* @test_v2f32_post_reg_st2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2f32_post_reg_st2lane:
		;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		ret float* %tmp
		}

		declare void @llvm.arm64.neon.st2lane.v2f32.p0f32(<2 x float>, <2 x float>, i64, float*)


		define double* @test_v2f64_post_imm_st2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind {
		;CHECK-LABEL: test_v2f64_post_imm_st2lane:
		;CHECK: st2.d { v0, v1 }[0], [x0], #16
		call void @llvm.arm64.neon.st2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
		%tmp = getelementptr double* %A, i64 2
		ret double* %tmp
		}

		define double* @test_v2f64_post_reg_st2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2f64_post_reg_st2lane:
		;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		ret double* %tmp
		}

		declare void @llvm.arm64.neon.st2lane.v2f64.p0f64(<2 x double>, <2 x double>, i64, double*)


		define double* @test_v1f64_post_imm_st2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind {
		;CHECK-LABEL: test_v1f64_post_imm_st2lane:
		;CHECK: st2.d { v0, v1 }[0], [x0], #16
		call void @llvm.arm64.neon.st2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
		%tmp = getelementptr double* %A, i64 2
		ret double* %tmp
		}

		define double* @test_v1f64_post_reg_st2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind {
		;CHECK-LABEL: test_v1f64_post_reg_st2lane:
		;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		ret double* %tmp
		}

		declare void @llvm.arm64.neon.st2lane.v1f64.p0f64(<1 x double>, <1 x double>, i64, double*)


		define i8* @test_v16i8_post_imm_st3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
		;CHECK-LABEL: test_v16i8_post_imm_st3lane:
		;CHECK: st3.b { v0, v1, v2 }[0], [x0], #3
		call void @llvm.arm64.neon.st3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
		%tmp = getelementptr i8* %A, i32 3
		ret i8* %tmp
		}

		define i8* @test_v16i8_post_reg_st3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v16i8_post_reg_st3lane:
		;CHECK: st3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		ret i8* %tmp
		}

		declare void @llvm.arm64.neon.st3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*)


		define i8* @test_v8i8_post_imm_st3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
		;CHECK-LABEL: test_v8i8_post_imm_st3lane:
		;CHECK: st3.b { v0, v1, v2 }[0], [x0], #3
		call void @llvm.arm64.neon.st3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
		%tmp = getelementptr i8* %A, i32 3
		ret i8* %tmp
		}

		define i8* @test_v8i8_post_reg_st3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v8i8_post_reg_st3lane:
		;CHECK: st3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		ret i8* %tmp
		}

		declare void @llvm.arm64.neon.st3lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i64, i8*)


		define i16* @test_v8i16_post_imm_st3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
		;CHECK-LABEL: test_v8i16_post_imm_st3lane:
		;CHECK: st3.h { v0, v1, v2 }[0], [x0], #6
		call void @llvm.arm64.neon.st3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
		%tmp = getelementptr i16* %A, i32 3
		ret i16* %tmp
		}

		define i16* @test_v8i16_post_reg_st3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v8i16_post_reg_st3lane:
		;CHECK: st3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		ret i16* %tmp
		}

		declare void @llvm.arm64.neon.st3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*)


		define i16* @test_v4i16_post_imm_st3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
		;CHECK-LABEL: test_v4i16_post_imm_st3lane:
		;CHECK: st3.h { v0, v1, v2 }[0], [x0], #6
		call void @llvm.arm64.neon.st3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
		%tmp = getelementptr i16* %A, i32 3
		ret i16* %tmp
		}

		define i16* @test_v4i16_post_reg_st3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4i16_post_reg_st3lane:
		;CHECK: st3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		ret i16* %tmp
		}

		declare void @llvm.arm64.neon.st3lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i64, i16*)


		define i32* @test_v4i32_post_imm_st3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
		;CHECK-LABEL: test_v4i32_post_imm_st3lane:
		;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
		call void @llvm.arm64.neon.st3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
		%tmp = getelementptr i32* %A, i32 3
		ret i32* %tmp
		}

		define i32* @test_v4i32_post_reg_st3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4i32_post_reg_st3lane:
		;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		ret i32* %tmp
		}

		declare void @llvm.arm64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*)


		define i32* @test_v2i32_post_imm_st3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
		;CHECK-LABEL: test_v2i32_post_imm_st3lane:
		;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
		call void @llvm.arm64.neon.st3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
		%tmp = getelementptr i32* %A, i32 3
		ret i32* %tmp
		}

		define i32* @test_v2i32_post_reg_st3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2i32_post_reg_st3lane:
		;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		ret i32* %tmp
		}

		declare void @llvm.arm64.neon.st3lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i64, i32*)


		define i64* @test_v2i64_post_imm_st3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
		;CHECK-LABEL: test_v2i64_post_imm_st3lane:
		;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
		call void @llvm.arm64.neon.st3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
		%tmp = getelementptr i64* %A, i64 3
		ret i64* %tmp
		}

		define i64* @test_v2i64_post_reg_st3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2i64_post_reg_st3lane:
		;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		ret i64* %tmp
		}

		declare void @llvm.arm64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*)


		define i64* @test_v1i64_post_imm_st3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
		;CHECK-LABEL: test_v1i64_post_imm_st3lane:
		;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
		call void @llvm.arm64.neon.st3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
		%tmp = getelementptr i64* %A, i64 3
		ret i64* %tmp
		}

		define i64* @test_v1i64_post_reg_st3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v1i64_post_reg_st3lane:
		;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		ret i64* %tmp
		}

		declare void @llvm.arm64.neon.st3lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64, i64*)


		define float* @test_v4f32_post_imm_st3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
		;CHECK-LABEL: test_v4f32_post_imm_st3lane:
		;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
		call void @llvm.arm64.neon.st3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
		%tmp = getelementptr float* %A, i32 3
		ret float* %tmp
		}

		define float* @test_v4f32_post_reg_st3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4f32_post_reg_st3lane:
		;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		ret float* %tmp
		}

		declare void @llvm.arm64.neon.st3lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, i64, float*)


		define float* @test_v2f32_post_imm_st3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
		;CHECK-LABEL: test_v2f32_post_imm_st3lane:
		;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
		call void @llvm.arm64.neon.st3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
		%tmp = getelementptr float* %A, i32 3
		ret float* %tmp
		}

		define float* @test_v2f32_post_reg_st3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2f32_post_reg_st3lane:
		;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		ret float* %tmp
		}

		declare void @llvm.arm64.neon.st3lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, i64, float*)


		define double* @test_v2f64_post_imm_st3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
		;CHECK-LABEL: test_v2f64_post_imm_st3lane:
		;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
		call void @llvm.arm64.neon.st3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
		%tmp = getelementptr double* %A, i64 3
		ret double* %tmp
		}

		define double* @test_v2f64_post_reg_st3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2f64_post_reg_st3lane:
		;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		ret double* %tmp
		}

		declare void @llvm.arm64.neon.st3lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, i64, double*)


		define double* @test_v1f64_post_imm_st3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
		;CHECK-LABEL: test_v1f64_post_imm_st3lane:
		;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
		call void @llvm.arm64.neon.st3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
		%tmp = getelementptr double* %A, i64 3
		ret double* %tmp
		}

		define double* @test_v1f64_post_reg_st3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
		;CHECK-LABEL: test_v1f64_post_reg_st3lane:
		;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		ret double* %tmp
		}

		declare void @llvm.arm64.neon.st3lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, i64, double*)


		define i8* @test_v16i8_post_imm_st4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
		;CHECK-LABEL: test_v16i8_post_imm_st4lane:
		;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], #4
		call void @llvm.arm64.neon.st4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
		%tmp = getelementptr i8* %A, i32 4
		ret i8* %tmp
		}

		define i8* @test_v16i8_post_reg_st4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v16i8_post_reg_st4lane:
		;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		ret i8* %tmp
		}

		declare void @llvm.arm64.neon.st4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*)


		define i8* @test_v8i8_post_imm_st4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
		;CHECK-LABEL: test_v8i8_post_imm_st4lane:
		;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], #4
		call void @llvm.arm64.neon.st4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
		%tmp = getelementptr i8* %A, i32 4
		ret i8* %tmp
		}

		define i8* @test_v8i8_post_reg_st4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v8i8_post_reg_st4lane:
		;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
		%tmp = getelementptr i8* %A, i64 %inc
		ret i8* %tmp
		}

		declare void @llvm.arm64.neon.st4lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i64, i8*)


		define i16* @test_v8i16_post_imm_st4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
		;CHECK-LABEL: test_v8i16_post_imm_st4lane:
		;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], #8
		call void @llvm.arm64.neon.st4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
		%tmp = getelementptr i16* %A, i32 4
		ret i16* %tmp
		}

		define i16* @test_v8i16_post_reg_st4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v8i16_post_reg_st4lane:
		;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		ret i16* %tmp
		}

		declare void @llvm.arm64.neon.st4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*)


		define i16* @test_v4i16_post_imm_st4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
		;CHECK-LABEL: test_v4i16_post_imm_st4lane:
		;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], #8
		call void @llvm.arm64.neon.st4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
		%tmp = getelementptr i16* %A, i32 4
		ret i16* %tmp
		}

		define i16* @test_v4i16_post_reg_st4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4i16_post_reg_st4lane:
		;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
		%tmp = getelementptr i16* %A, i64 %inc
		ret i16* %tmp
		}

		declare void @llvm.arm64.neon.st4lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i64, i16*)


		define i32* @test_v4i32_post_imm_st4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
		;CHECK-LABEL: test_v4i32_post_imm_st4lane:
		;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
		call void @llvm.arm64.neon.st4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
		%tmp = getelementptr i32* %A, i32 4
		ret i32* %tmp
		}

		define i32* @test_v4i32_post_reg_st4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4i32_post_reg_st4lane:
		;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		ret i32* %tmp
		}

		declare void @llvm.arm64.neon.st4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*)


		define i32* @test_v2i32_post_imm_st4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
		;CHECK-LABEL: test_v2i32_post_imm_st4lane:
		;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
		call void @llvm.arm64.neon.st4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
		%tmp = getelementptr i32* %A, i32 4
		ret i32* %tmp
		}

		define i32* @test_v2i32_post_reg_st4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2i32_post_reg_st4lane:
		;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
		%tmp = getelementptr i32* %A, i64 %inc
		ret i32* %tmp
		}

		declare void @llvm.arm64.neon.st4lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i64, i32*)


		define i64* @test_v2i64_post_imm_st4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
		;CHECK-LABEL: test_v2i64_post_imm_st4lane:
		;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
		call void @llvm.arm64.neon.st4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
		%tmp = getelementptr i64* %A, i64 4
		ret i64* %tmp
		}

		define i64* @test_v2i64_post_reg_st4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2i64_post_reg_st4lane:
		;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		ret i64* %tmp
		}

		declare void @llvm.arm64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*)


		define i64* @test_v1i64_post_imm_st4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
		;CHECK-LABEL: test_v1i64_post_imm_st4lane:
		;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
		call void @llvm.arm64.neon.st4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
		%tmp = getelementptr i64* %A, i64 4
		ret i64* %tmp
		}

		define i64* @test_v1i64_post_reg_st4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v1i64_post_reg_st4lane:
		;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
		%tmp = getelementptr i64* %A, i64 %inc
		ret i64* %tmp
		}

		declare void @llvm.arm64.neon.st4lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, i64*)


		define float* @test_v4f32_post_imm_st4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
		;CHECK-LABEL: test_v4f32_post_imm_st4lane:
		;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
		call void @llvm.arm64.neon.st4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
		%tmp = getelementptr float* %A, i32 4
		ret float* %tmp
		}

		define float* @test_v4f32_post_reg_st4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v4f32_post_reg_st4lane:
		;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		ret float* %tmp
		}

		declare void @llvm.arm64.neon.st4lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, i64, float*)


		define float* @test_v2f32_post_imm_st4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
		;CHECK-LABEL: test_v2f32_post_imm_st4lane:
		;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
		call void @llvm.arm64.neon.st4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
		%tmp = getelementptr float* %A, i32 4
		ret float* %tmp
		}

		define float* @test_v2f32_post_reg_st4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2f32_post_reg_st4lane:
		;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
		%tmp = getelementptr float* %A, i64 %inc
		ret float* %tmp
		}

		declare void @llvm.arm64.neon.st4lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, i64, float*)


		define double* @test_v2f64_post_imm_st4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
		;CHECK-LABEL: test_v2f64_post_imm_st4lane:
		;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
		call void @llvm.arm64.neon.st4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
		%tmp = getelementptr double* %A, i64 4
		ret double* %tmp
		}

		define double* @test_v2f64_post_reg_st4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v2f64_post_reg_st4lane:
		;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		ret double* %tmp
		}

		declare void @llvm.arm64.neon.st4lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, i64, double*)


		define double* @test_v1f64_post_imm_st4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
		;CHECK-LABEL: test_v1f64_post_imm_st4lane:
		;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
		call void @llvm.arm64.neon.st4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
		%tmp = getelementptr double* %A, i64 4
		ret double* %tmp
		}

		define double* @test_v1f64_post_reg_st4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
		;CHECK-LABEL: test_v1f64_post_reg_st4lane:
		;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
		call void @llvm.arm64.neon.st4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
		%tmp = getelementptr double* %A, i64 %inc
		ret double* %tmp
		}

		declare void @llvm.arm64.neon.st4lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, i64, double*)
		No newline at end of file
Context not available.