This is an archive of the discontinued LLVM Phabricator instance.

Differential D20250

[ARM/AArch64] Match additional patterns to ldN instructions
ClosedPublic

Authored by mssimpso on May 13 2016, 11:12 AM.

Download Raw Diff

Details

Reviewers

rengolin
t.p.northover
jmolloy

Commits

rG476c0afc014b: [ARM, AArch64] Match additional patterns to ldN instructions
rL270142: [ARM, AArch64] Match additional patterns to ldN instructions

Summary

When matching an interleaved load to an ldN pattern, the interleaved access pass checks that all users of the load are shuffles. If the load is used by an instruction other than a shuffle, the pass gives up and an ldN is not generated. This patch considers users of the load that are extractelement instructions. It attempts to modify the extracts to use one of the available shuffles instead of the load. After the transformation, the load is only used by shuffles and will then be matched with an ldN pattern.

Diff Detail

Repository: rL LLVM

Event Timeline

mssimpso updated this revision to Diff 57223.May 13 2016, 11:12 AM

mssimpso retitled this revision from to [ARM/AArch64] Match additional patterns to ldN instructions.

mssimpso updated this object.

mssimpso added reviewers: jmolloy, rengolin.

mssimpso added subscribers: mcrosier, llvm-commits.

Herald added subscribers: rengolin, aemerson. · View Herald TranscriptMay 13 2016, 11:12 AM

I think the testing should probably be overhauled. A couple of CodeGen ones are OK, but this is an IR-level pass that should be tested by opt really.

The coverage is also pretty weak: it's basically just one test where the optimization applies. What if the shuffle doesn't dominate the extract? Or there's no suitable shuffle? What if there's more than one extract? What if the extract index is undef or variable?

Other than that, it looks reasonable to me. Couple of minor comments.

Tim.

lib/CodeGen/InterleavedAccessPass.cpp
249 ↗	(On Diff #57223)	Perhaps "tryReplaceExtracts" since the function actually makes changes.
322–325 ↗	(On Diff #57223)	This should probably use IRBuilder (to get the debug location transferred if nothing else).

Addressed Tim's comments.

Tim,

Thanks very much for the feedback! I've addressed your comments and added the IR-level tests you suggested. For the pass to be visible in opt, I had to change the way it was being initialized. Please let me know if my changes look correct. I can commit the initialization changes in a separate patch before this one.

Matt

Thanks Matthew. I think this looks good now.

Tim.

This revision is now accepted and ready to land.May 19 2016, 9:35 AM

Thanks, Tim!

mssimpso mentioned this in rL270101: [ARM, AArch64] Properly initialize InterleavedAccessPass.May 19 2016, 1:14 PM

Closed by commit rL270142: [ARM, AArch64] Match additional patterns to ldN instructions (authored by mssimpso). · Explain WhyMay 19 2016, 2:45 PM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

llvm/

trunk/

lib/

CodeGen/

InterleavedAccessPass.cpp

112 lines

test/

CodeGen/

AArch64/

aarch64-interleaved-accesses-extract-user.ll

86 lines

aarch64-interleaved-accesses.ll

12 lines

ARM/

arm-interleaved-accesses-extract-user.ll

86 lines

arm-interleaved-accesses.ll

12 lines

Diff 57860

llvm/trunk/lib/CodeGen/InterleavedAccessPass.cpp

Show All 34 Lines
// store <12 x i32> %i.vec, <12 x i32>* %ptr		// store <12 x i32> %i.vec, <12 x i32>* %ptr
//		//
// It could be transformed into a st3 intrinsic in AArch64 backend or a vst3		// It could be transformed into a st3 intrinsic in AArch64 backend or a vst3
// intrinsic in ARM backend.		// intrinsic in ARM backend.
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#include "llvm/CodeGen/Passes.h"		#include "llvm/CodeGen/Passes.h"
		#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstIterator.h"		#include "llvm/IR/InstIterator.h"
#include "llvm/Support/Debug.h"		#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"		#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"		#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"		#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetSubtargetInfo.h"		#include "llvm/Target/TargetSubtargetInfo.h"

using namespace llvm;		using namespace llvm;
Show All 9 Lines

namespace {		namespace {

class InterleavedAccess : public FunctionPass {		class InterleavedAccess : public FunctionPass {

public:		public:
static char ID;		static char ID;
InterleavedAccess(const TargetMachine *TM = nullptr)		InterleavedAccess(const TargetMachine *TM = nullptr)
: FunctionPass(ID), TM(TM), TLI(nullptr) {		: FunctionPass(ID), DT(nullptr), TM(TM), TLI(nullptr) {
initializeInterleavedAccessPass(*PassRegistry::getPassRegistry());		initializeInterleavedAccessPass(*PassRegistry::getPassRegistry());
}		}

const char *getPassName() const override { return "Interleaved Access Pass"; }		const char *getPassName() const override { return "Interleaved Access Pass"; }

bool runOnFunction(Function &F) override;		bool runOnFunction(Function &F) override;

		void getAnalysisUsage(AnalysisUsage &AU) const override {
		AU.addRequired<DominatorTreeWrapperPass>();
		AU.addPreserved<DominatorTreeWrapperPass>();
		}

private:		private:
		DominatorTree *DT;
const TargetMachine *TM;		const TargetMachine *TM;
const TargetLowering *TLI;		const TargetLowering *TLI;

/// \brief Transform an interleaved load into target specific intrinsics.		/// \brief Transform an interleaved load into target specific intrinsics.
bool lowerInterleavedLoad(LoadInst *LI,		bool lowerInterleavedLoad(LoadInst *LI,
SmallVector<Instruction *, 32> &DeadInsts);		SmallVector<Instruction *, 32> &DeadInsts);

/// \brief Transform an interleaved store into target specific intrinsics.		/// \brief Transform an interleaved store into target specific intrinsics.
bool lowerInterleavedStore(StoreInst *SI,		bool lowerInterleavedStore(StoreInst *SI,
SmallVector<Instruction *, 32> &DeadInsts);		SmallVector<Instruction *, 32> &DeadInsts);

		/// \brief Returns true if the uses of an interleaved load by the
		/// extractelement instructions in \p Extracts can be replaced by uses of the
		/// shufflevector instructions in \p Shuffles instead. If so, the necessary
		/// replacements are also performed.
		bool tryReplaceExtracts(ArrayRef<ExtractElementInst *> Extracts,
		ArrayRef<ShuffleVectorInst *> Shuffles);
};		};
} // end anonymous namespace.		} // end anonymous namespace.

char InterleavedAccess::ID = 0;		char InterleavedAccess::ID = 0;
INITIALIZE_TM_PASS(InterleavedAccess, "interleaved-access",		INITIALIZE_TM_PASS_BEGIN(
"Lower interleaved memory accesses to target specific intrinsics",		InterleavedAccess, "interleaved-access",
false, false)		"Lower interleaved memory accesses to target specific intrinsics", false,
		false)
		INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
		INITIALIZE_TM_PASS_END(
		InterleavedAccess, "interleaved-access",
		"Lower interleaved memory accesses to target specific intrinsics", false,
		false)

FunctionPass llvm::createInterleavedAccessPass(const TargetMachine TM) {		FunctionPass llvm::createInterleavedAccessPass(const TargetMachine TM) {
return new InterleavedAccess(TM);		return new InterleavedAccess(TM);
}		}

/// \brief Check if the mask is a DE-interleave mask of the given factor		/// \brief Check if the mask is a DE-interleave mask of the given factor
/// \p Factor like:		/// \p Factor like:
/// <Index, Index+Factor, ..., Index+(NumElts-1)*Factor>		/// <Index, Index+Factor, ..., Index+(NumElts-1)*Factor>
▲ Show 20 Lines • Show All 72 Lines • ▼ Show 20 Lines
}		}

bool InterleavedAccess::lowerInterleavedLoad(		bool InterleavedAccess::lowerInterleavedLoad(
LoadInst LI, SmallVector<Instruction , 32> &DeadInsts) {		LoadInst LI, SmallVector<Instruction , 32> &DeadInsts) {
if (!LI->isSimple())		if (!LI->isSimple())
return false;		return false;

SmallVector<ShuffleVectorInst *, 4> Shuffles;		SmallVector<ShuffleVectorInst *, 4> Shuffles;
		SmallVector<ExtractElementInst *, 4> Extracts;

// Check if all users of this load are shufflevectors.		// Check if all users of this load are shufflevectors. If we encounter any
		// users that are extractelement instructions, we save them to later check if
		// they can be modifed to extract from one of the shufflevectors instead of
		// the load.
for (auto UI = LI->user_begin(), E = LI->user_end(); UI != E; UI++) {		for (auto UI = LI->user_begin(), E = LI->user_end(); UI != E; UI++) {
		auto Extract = dyn_cast<ExtractElementInst>(UI);
		if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
		Extracts.push_back(Extract);
		continue;
		}
ShuffleVectorInst SVI = dyn_cast<ShuffleVectorInst>(UI);		ShuffleVectorInst SVI = dyn_cast<ShuffleVectorInst>(UI);
if (!SVI \|\| !isa<UndefValue>(SVI->getOperand(1)))		if (!SVI \|\| !isa<UndefValue>(SVI->getOperand(1)))
return false;		return false;

Shuffles.push_back(SVI);		Shuffles.push_back(SVI);
}		}

if (Shuffles.empty())		if (Shuffles.empty())
Show All 19 Lines	for (unsigned i = 1; i < Shuffles.size(); i++) {

if (!isDeInterleaveMaskOfFactor(Shuffles[i]->getShuffleMask(), Factor,		if (!isDeInterleaveMaskOfFactor(Shuffles[i]->getShuffleMask(), Factor,
Index))		Index))
return false;		return false;

Indices.push_back(Index);		Indices.push_back(Index);
}		}

		// Try and modify users of the load that are extractelement instructions to
		// use the shufflevector instructions instead of the load.
		if (!tryReplaceExtracts(Extracts, Shuffles))
		return false;

DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n");		DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n");

// Try to create target specific intrinsics to replace the load and shuffles.		// Try to create target specific intrinsics to replace the load and shuffles.
if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor))		if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor))
return false;		return false;

for (auto SVI : Shuffles)		for (auto SVI : Shuffles)
DeadInsts.push_back(SVI);		DeadInsts.push_back(SVI);

DeadInsts.push_back(LI);		DeadInsts.push_back(LI);
return true;		return true;
}		}

		bool InterleavedAccess::tryReplaceExtracts(
		ArrayRef<ExtractElementInst *> Extracts,
		ArrayRef<ShuffleVectorInst *> Shuffles) {

		// If there aren't any extractelement instructions to modify, there's nothing
		// to do.
		if (Extracts.empty())
		return true;

		// Maps extractelement instructions to vector-index pairs. The extractlement
		// instructions will be modified to use the new vector and index operands.
		DenseMap<ExtractElementInst , std::pair<Value , int>> ReplacementMap;

		for (auto *Extract : Extracts) {

		// The vector index that is extracted.
		auto *IndexOperand = cast<ConstantInt>(Extract->getIndexOperand());
		auto Index = IndexOperand->getSExtValue();

		// Look for a suitable shufflevector instruction. The goal is to modify the
		// extractelement instruction (which uses an interleaved load) to use one
		// of the shufflevector instructions instead of the load.
		for (auto *Shuffle : Shuffles) {

		// If the shufflevector instruction doesn't dominate the extract, we
		// can't create a use of it.
		if (!DT->dominates(Shuffle, Extract))
		continue;

		// Inspect the indices of the shufflevector instruction. If the shuffle
		// selects the same index that is extracted, we can modify the
		// extractelement instruction.
		SmallVector<int, 4> Indices;
		Shuffle->getShuffleMask(Indices);
		for (unsigned I = 0; I < Indices.size(); ++I)
		if (Indices[I] == Index) {
		assert(Extract->getOperand(0) == Shuffle->getOperand(0) &&
		"Vector operations do not match");
		ReplacementMap[Extract] = std::make_pair(Shuffle, I);
		break;
		}

		// If we found a suitable shufflevector instruction, stop looking.
		if (ReplacementMap.count(Extract))
		break;
		}

		// If we did not find a suitable shufflevector instruction, the
		// extractelement instruction cannot be modified, so we must give up.
		if (!ReplacementMap.count(Extract))
		return false;
		}

		// Finally, perform the replacements.
		IRBuilder<> Builder(Extracts[0]->getContext());
		for (auto &Replacement : ReplacementMap) {
		auto *Extract = Replacement.first;
		auto *Vector = Replacement.second.first;
		auto Index = Replacement.second.second;
		Builder.SetInsertPoint(Extract);
		Extract->replaceAllUsesWith(Builder.CreateExtractElement(Vector, Index));
		Extract->eraseFromParent();
		}

		return true;
		}

bool InterleavedAccess::lowerInterleavedStore(		bool InterleavedAccess::lowerInterleavedStore(
StoreInst SI, SmallVector<Instruction , 32> &DeadInsts) {		StoreInst SI, SmallVector<Instruction , 32> &DeadInsts) {
if (!SI->isSimple())		if (!SI->isSimple())
return false;		return false;

ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(SI->getValueOperand());		ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(SI->getValueOperand());
if (!SVI \|\| !SVI->hasOneUse())		if (!SVI \|\| !SVI->hasOneUse())
return false;		return false;
Show All 16 Lines
}		}

bool InterleavedAccess::runOnFunction(Function &F) {		bool InterleavedAccess::runOnFunction(Function &F) {
if (!TM \|\| !LowerInterleavedAccesses)		if (!TM \|\| !LowerInterleavedAccesses)
return false;		return false;

DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n");		DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n");

		DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
TLI = TM->getSubtargetImpl(F)->getTargetLowering();		TLI = TM->getSubtargetImpl(F)->getTargetLowering();
MaxFactor = TLI->getMaxSupportedInterleaveFactor();		MaxFactor = TLI->getMaxSupportedInterleaveFactor();

// Holds dead instructions that will be erased later.		// Holds dead instructions that will be erased later.
SmallVector<Instruction *, 32> DeadInsts;		SmallVector<Instruction *, 32> DeadInsts;
bool Changed = false;		bool Changed = false;

for (auto &I : instructions(F)) {		for (auto &I : instructions(F)) {
Show All 12 Lines

llvm/trunk/test/CodeGen/AArch64/aarch64-interleaved-accesses-extract-user.ll

				; RUN: opt < %s -mtriple=aarch64 -interleaved-access -S \| FileCheck %s

				; CHECK-LABEL: @extract_user_basic(
				; CHECK: %ldN = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32
				; CHECK: %[[R:.+]] = extractvalue { <4 x i32>, <4 x i32> } %ldN, 0
				; CHECK: extractelement <4 x i32> %[[R]], i64 1
				define void @extract_user_basic(<8 x i32>* %A, i1 %C) {
				entry:
				%L = load <8 x i32>, <8 x i32>* %A, align 8
				%S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
				br i1 %C, label %if.then, label %if.merge

				if.then:
				%E = extractelement <8 x i32> %L, i32 2
				br label %if.merge

				if.merge:
				ret void
				}

				; CHECK-LABEL: @extract_user_multi(
				; CHECK: %ldN = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32
				; CHECK: %[[R:.+]] = extractvalue { <4 x i32>, <4 x i32> } %ldN, 0
				; CHECK: extractelement <4 x i32> %[[R]], i64 0
				; CHECK: extractelement <4 x i32> %[[R]], i64 1
				define void @extract_user_multi(<8 x i32>* %A, i1 %C) {
				entry:
				%L = load <8 x i32>, <8 x i32>* %A, align 8
				%S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
				br i1 %C, label %if.then, label %if.merge

				if.then:
				%E1 = extractelement <8 x i32> %L, i32 0
				br label %if.merge

				if.merge:
				%E2 = extractelement <8 x i32> %L, i32 2
				ret void
				}

				; CHECK-LABEL: @extract_user_multi_no_dom(
				; CHECK-NOT: %ldN = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32
				define void @extract_user_multi_no_dom(<8 x i32>* %A, i1 %C) {
				entry:
				%L = load <8 x i32>, <8 x i32>* %A, align 8
				%E1 = extractelement <8 x i32> %L, i32 0
				br i1 %C, label %if.then, label %if.merge

				if.then:
				%S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
				%E2 = extractelement <8 x i32> %L, i32 2
				br label %if.merge

				if.merge:
				ret void
				}

				; CHECK-LABEL: @extract_user_wrong_const_index(
				; CHECK-NOT: %ldN = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32
				define void @extract_user_wrong_const_index(<8 x i32>* %A) {
				entry:
				%L = load <8 x i32>, <8 x i32>* %A, align 8
				%S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
				%E = extractelement <8 x i32> %L, i32 1
				ret void
				}

				; CHECK-LABEL: @extract_user_undef_index(
				; CHECK-NOT: %ldN = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32
				define void @extract_user_undef_index(<8 x i32>* %A) {
				entry:
				%L = load <8 x i32>, <8 x i32>* %A, align 8
				%S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
				%E = extractelement <8 x i32> %L, i32 undef
				ret void
				}

				; CHECK-LABEL: @extract_user_var_index(
				; CHECK-NOT: %ldN = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0v4i32
				define void @extract_user_var_index(<8 x i32>* %A, i32 %I) {
				entry:
				%L = load <8 x i32>, <8 x i32>* %A, align 8
				%S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
				%E = extractelement <8 x i32> %L, i32 %I
				ret void
				}

llvm/trunk/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll

	Show First 20 Lines • Show All 262 Lines • ▼ Show 20 Lines
	; NONEON-NEXT: bfi x[[RES]], x[[ELT2]], #32, #32			; NONEON-NEXT: bfi x[[RES]], x[[ELT2]], #32, #32
	; NONEON-NEXT: str x[[RES]], [x0]			; NONEON-NEXT: str x[[RES]], [x0]
	; NONEON-NEXT: ret			; NONEON-NEXT: ret
	define void @store_illegal_factor2(<3 x float>* %p, <3 x float> %v) nounwind {			define void @store_illegal_factor2(<3 x float>* %p, <3 x float> %v) nounwind {
	%tmp1 = shufflevector <3 x float> %v, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>			%tmp1 = shufflevector <3 x float> %v, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
	store <3 x float> %tmp1, <3 x float>* %p, align 16			store <3 x float> %tmp1, <3 x float>* %p, align 16
	ret void			ret void
	}			}

				; NEON-LABEL: load_factor2_with_extract_user:
				; NEON: ld2 { v0.4s, v1.4s }, [x0]
				; NEON: mov w0, v0.s[1]
				; NONEON-LABEL: load_factor2_with_extract_user:
				; NONEON-NOT: ld2
				define i32 @load_factor2_with_extract_user(<8 x i32>* %a) {
				%1 = load <8 x i32>, <8 x i32>* %a, align 8
				%2 = shufflevector <8 x i32> %1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
				%3 = extractelement <8 x i32> %1, i32 2
				ret i32 %3
				}

llvm/trunk/test/CodeGen/ARM/arm-interleaved-accesses-extract-user.ll

				; RUN: opt < %s -mtriple=arm-eabi -mattr=+neon -interleaved-access -S \| FileCheck %s

				; CHECK-LABEL: @extract_user_basic(
				; CHECK: %vldN = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8
				; CHECK: %[[R:.+]] = extractvalue { <4 x i32>, <4 x i32> } %vldN, 0
				; CHECK: extractelement <4 x i32> %[[R]], i64 1
				define void @extract_user_basic(<8 x i32>* %A, i1 %C) {
				entry:
				%L = load <8 x i32>, <8 x i32>* %A, align 8
				%S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
				br i1 %C, label %if.then, label %if.merge

				if.then:
				%E = extractelement <8 x i32> %L, i32 2
				br label %if.merge

				if.merge:
				ret void
				}

				; CHECK-LABEL: @extract_user_multi(
				; CHECK: %vldN = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8
				; CHECK: %[[R:.+]] = extractvalue { <4 x i32>, <4 x i32> } %vldN, 0
				; CHECK: extractelement <4 x i32> %[[R]], i64 0
				; CHECK: extractelement <4 x i32> %[[R]], i64 1
				define void @extract_user_multi(<8 x i32>* %A, i1 %C) {
				entry:
				%L = load <8 x i32>, <8 x i32>* %A, align 8
				%S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
				br i1 %C, label %if.then, label %if.merge

				if.then:
				%E1 = extractelement <8 x i32> %L, i32 0
				br label %if.merge

				if.merge:
				%E2 = extractelement <8 x i32> %L, i32 2
				ret void
				}

				; CHECK-LABEL: @extract_user_multi_no_dom(
				; CHECK-NOT: %vldN = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8
				define void @extract_user_multi_no_dom(<8 x i32>* %A, i1 %C) {
				entry:
				%L = load <8 x i32>, <8 x i32>* %A, align 8
				%E1 = extractelement <8 x i32> %L, i32 0
				br i1 %C, label %if.then, label %if.merge

				if.then:
				%S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
				%E2 = extractelement <8 x i32> %L, i32 2
				br label %if.merge

				if.merge:
				ret void
				}

				; CHECK-LABEL: @extract_user_wrong_const_index(
				; CHECK-NOT: %vldN = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8
				define void @extract_user_wrong_const_index(<8 x i32>* %A) {
				entry:
				%L = load <8 x i32>, <8 x i32>* %A, align 8
				%S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
				%E = extractelement <8 x i32> %L, i32 1
				ret void
				}

				; CHECK-LABEL: @extract_user_undef_index(
				; CHECK-NOT: %vldN = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8
				define void @extract_user_undef_index(<8 x i32>* %A) {
				entry:
				%L = load <8 x i32>, <8 x i32>* %A, align 8
				%S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
				%E = extractelement <8 x i32> %L, i32 undef
				ret void
				}

				; CHECK-LABEL: @extract_user_var_index(
				; CHECK-NOT: %vldN = call { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2.v4i32.p0i8
				define void @extract_user_var_index(<8 x i32>* %A, i32 %I) {
				entry:
				%L = load <8 x i32>, <8 x i32>* %A, align 8
				%S = shufflevector <8 x i32> %L, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
				%E = extractelement <8 x i32> %L, i32 %I
				ret void
				}

llvm/trunk/test/CodeGen/ARM/arm-interleaved-accesses.ll

	Show First 20 Lines • Show All 298 Lines • ▼ Show 20 Lines
	; NONEON: BB#0:			; NONEON: BB#0:
	; NONEON-NEXT: stm r0, {r1, r3}			; NONEON-NEXT: stm r0, {r1, r3}
	; NONEON-NEXT: mov pc, lr			; NONEON-NEXT: mov pc, lr
	define void @store_illegal_factor2(<3 x float>* %p, <3 x float> %v) nounwind {			define void @store_illegal_factor2(<3 x float>* %p, <3 x float> %v) nounwind {
	%tmp1 = shufflevector <3 x float> %v, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>			%tmp1 = shufflevector <3 x float> %v, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
	store <3 x float> %tmp1, <3 x float>* %p, align 16			store <3 x float> %tmp1, <3 x float>* %p, align 16
	ret void			ret void
	}			}

				; NEON-LABEL: load_factor2_with_extract_user:
				; NEON: vld2.32 {d16, d17, d18, d19}, [r0:64]
				; NEON: vmov.32 r0, d16[1]
				; NONEON-LABEL: load_factor2_with_extract_user:
				; NONEON-NOT: vld2
				define i32 @load_factor2_with_extract_user(<8 x i32>* %a) {
				%1 = load <8 x i32>, <8 x i32>* %a, align 8
				%2 = shufflevector <8 x i32> %1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
				%3 = extractelement <8 x i32> %1, i32 2
				ret i32 %3
				}