Diff 257258

llvm/lib/Target/AArch64/AArch64.h

	Show First 20 Lines • Show All 46 Lines • ▼ Show 20 Lines
	FunctionPass *createAArch64A53Fix835769();			FunctionPass *createAArch64A53Fix835769();
	FunctionPass *createFalkorHWPFFixPass();			FunctionPass *createFalkorHWPFFixPass();
	FunctionPass *createFalkorMarkStridedAccessesPass();			FunctionPass *createFalkorMarkStridedAccessesPass();
	FunctionPass *createAArch64BranchTargetsPass();			FunctionPass *createAArch64BranchTargetsPass();

	FunctionPass *createAArch64CleanupLocalDynamicTLSPass();			FunctionPass *createAArch64CleanupLocalDynamicTLSPass();

	FunctionPass *createAArch64CollectLOHPass();			FunctionPass *createAArch64CollectLOHPass();
				ModulePass *createSVEIntrinsicOptsPass();
	InstructionSelector *			InstructionSelector *
	createAArch64InstructionSelector(const AArch64TargetMachine &,			createAArch64InstructionSelector(const AArch64TargetMachine &,
	AArch64Subtarget &, AArch64RegisterBankInfo &);			AArch64Subtarget &, AArch64RegisterBankInfo &);
	FunctionPass *createAArch64PreLegalizeCombiner(bool IsOptNone);			FunctionPass *createAArch64PreLegalizeCombiner(bool IsOptNone);
	FunctionPass *createAArch64StackTaggingPass(bool MergeInit);			FunctionPass *createAArch64StackTaggingPass(bool MergeInit);
	FunctionPass *createAArch64StackTaggingPreRAPass();			FunctionPass *createAArch64StackTaggingPreRAPass();

	void initializeAArch64A53Fix835769Pass(PassRegistry&);			void initializeAArch64A53Fix835769Pass(PassRegistry&);
	Show All 12 Lines
	void initializeAArch64SIMDInstrOptPass(PassRegistry&);			void initializeAArch64SIMDInstrOptPass(PassRegistry&);
	void initializeAArch64PreLegalizerCombinerPass(PassRegistry&);			void initializeAArch64PreLegalizerCombinerPass(PassRegistry&);
	void initializeAArch64PromoteConstantPass(PassRegistry&);			void initializeAArch64PromoteConstantPass(PassRegistry&);
	void initializeAArch64RedundantCopyEliminationPass(PassRegistry&);			void initializeAArch64RedundantCopyEliminationPass(PassRegistry&);
	void initializeAArch64StorePairSuppressPass(PassRegistry&);			void initializeAArch64StorePairSuppressPass(PassRegistry&);
	void initializeFalkorHWPFFixPass(PassRegistry&);			void initializeFalkorHWPFFixPass(PassRegistry&);
	void initializeFalkorMarkStridedAccessesLegacyPass(PassRegistry&);			void initializeFalkorMarkStridedAccessesLegacyPass(PassRegistry&);
	void initializeLDTLSCleanupPass(PassRegistry&);			void initializeLDTLSCleanupPass(PassRegistry&);
				void initializeSVEIntrinsicOptsPass(PassRegistry&);
	void initializeAArch64StackTaggingPass(PassRegistry&);			void initializeAArch64StackTaggingPass(PassRegistry&);
	void initializeAArch64StackTaggingPreRAPass(PassRegistry&);			void initializeAArch64StackTaggingPreRAPass(PassRegistry&);
	} // end namespace llvm			} // end namespace llvm

	#endif			#endif

llvm/lib/Target/AArch64/AArch64TargetMachine.cpp

Show First 20 Lines • Show All 140 Lines • ▼ Show 20 Lines	EnableLoopDataPrefetch("aarch64-enable-loop-data-prefetch", cl::Hidden,
cl::desc("Enable the loop data prefetch pass"),		cl::desc("Enable the loop data prefetch pass"),
cl::init(true));		cl::init(true));

static cl::opt<int> EnableGlobalISelAtO(		static cl::opt<int> EnableGlobalISelAtO(
"aarch64-enable-global-isel-at-O", cl::Hidden,		"aarch64-enable-global-isel-at-O", cl::Hidden,
cl::desc("Enable GlobalISel at or below an opt level (-1 to disable)"),		cl::desc("Enable GlobalISel at or below an opt level (-1 to disable)"),
cl::init(0));		cl::init(0));

		static cl::opt<bool> EnableSVEIntrinsicOpts(
		"aarch64-sve-intrinsic-opts", cl::Hidden,
		cl::desc("Enable SVE intrinsic opts"),
		cl::init(true));

static cl::opt<bool> EnableFalkorHWPFFix("aarch64-enable-falkor-hwpf-fix",		static cl::opt<bool> EnableFalkorHWPFFix("aarch64-enable-falkor-hwpf-fix",
cl::init(true), cl::Hidden);		cl::init(true), cl::Hidden);

static cl::opt<bool>		static cl::opt<bool>
EnableBranchTargets("aarch64-enable-branch-targets", cl::Hidden,		EnableBranchTargets("aarch64-enable-branch-targets", cl::Hidden,
cl::desc("Enable the AAcrh64 branch target pass"),		cl::desc("Enable the AAcrh64 branch target pass"),
cl::init(true));		cl::init(true));

Show All 20 Lines	extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
initializeAArch64SIMDInstrOptPass(*PR);		initializeAArch64SIMDInstrOptPass(*PR);
initializeAArch64PreLegalizerCombinerPass(*PR);		initializeAArch64PreLegalizerCombinerPass(*PR);
initializeAArch64PromoteConstantPass(*PR);		initializeAArch64PromoteConstantPass(*PR);
initializeAArch64RedundantCopyEliminationPass(*PR);		initializeAArch64RedundantCopyEliminationPass(*PR);
initializeAArch64StorePairSuppressPass(*PR);		initializeAArch64StorePairSuppressPass(*PR);
initializeFalkorHWPFFixPass(*PR);		initializeFalkorHWPFFixPass(*PR);
initializeFalkorMarkStridedAccessesLegacyPass(*PR);		initializeFalkorMarkStridedAccessesLegacyPass(*PR);
initializeLDTLSCleanupPass(*PR);		initializeLDTLSCleanupPass(*PR);
		initializeSVEIntrinsicOptsPass(*PR);
initializeAArch64SpeculationHardeningPass(*PR);		initializeAArch64SpeculationHardeningPass(*PR);
initializeAArch64StackTaggingPass(*PR);		initializeAArch64StackTaggingPass(*PR);
initializeAArch64StackTaggingPreRAPass(*PR);		initializeAArch64StackTaggingPreRAPass(*PR);
}		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// AArch64 Lowering public interface.		// AArch64 Lowering public interface.
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
▲ Show 20 Lines • Show All 236 Lines • ▼ Show 20 Lines	std::unique_ptr<CSEConfigBase> AArch64PassConfig::getCSEConfig() const {
return getStandardCSEConfigForOpt(TM->getOptLevel());		return getStandardCSEConfigForOpt(TM->getOptLevel());
}		}

void AArch64PassConfig::addIRPasses() {		void AArch64PassConfig::addIRPasses() {
// Always expand atomic operations, we don't deal with atomicrmw or cmpxchg		// Always expand atomic operations, we don't deal with atomicrmw or cmpxchg
// ourselves.		// ourselves.
addPass(createAtomicExpandPass());		addPass(createAtomicExpandPass());

		// Expand any SVE vector library calls that we can't code generate directly.
		if (EnableSVEIntrinsicOpts && TM->getOptLevel() == CodeGenOpt::Aggressive)
		efriedmaUnsubmitted Not Done Reply Inline Actions unused bool? efriedma: unused bool?
		kmclaughlinAuthorUnsubmitted Done Reply Inline Actions Removed kmclaughlin: Removed
		addPass(createSVEIntrinsicOptsPass());

// Cmpxchg instructions are often used with a subsequent comparison to		// Cmpxchg instructions are often used with a subsequent comparison to
// determine whether it succeeded. We can exploit existing control-flow in		// determine whether it succeeded. We can exploit existing control-flow in
// ldrex/strex loops to simplify this, but it needs tidying up.		// ldrex/strex loops to simplify this, but it needs tidying up.
if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)		if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
addPass(createCFGSimplificationPass(1, true, true, false, true));		addPass(createCFGSimplificationPass(1, true, true, false, true));

// Run LoopDataPrefetch		// Run LoopDataPrefetch
//		//
▲ Show 20 Lines • Show All 209 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/CMakeLists.txt

Show First 20 Lines • Show All 58 Lines • ▼ Show 20 Lines	add_llvm_target(AArch64CodeGen
AArch64SpeculationHardening.cpp		AArch64SpeculationHardening.cpp
AArch64StackTagging.cpp		AArch64StackTagging.cpp
AArch64StackTaggingPreRA.cpp		AArch64StackTaggingPreRA.cpp
AArch64StorePairSuppress.cpp		AArch64StorePairSuppress.cpp
AArch64Subtarget.cpp		AArch64Subtarget.cpp
AArch64TargetMachine.cpp		AArch64TargetMachine.cpp
AArch64TargetObjectFile.cpp		AArch64TargetObjectFile.cpp
AArch64TargetTransformInfo.cpp		AArch64TargetTransformInfo.cpp
		SVEIntrinsicOpts.cpp
AArch64SIMDInstrOpt.cpp		AArch64SIMDInstrOpt.cpp

DEPENDS		DEPENDS
intrinsics_gen		intrinsics_gen
)		)

add_subdirectory(AsmParser)		add_subdirectory(AsmParser)
add_subdirectory(Disassembler)		add_subdirectory(Disassembler)
add_subdirectory(MCTargetDesc)		add_subdirectory(MCTargetDesc)
add_subdirectory(TargetInfo)		add_subdirectory(TargetInfo)
add_subdirectory(Utils)		add_subdirectory(Utils)

llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp

This file was added.

				//===----- SVEIntrinsicOpts - SVE ACLE Intrinsics Opts --------------------===//
				//
				// The LLVM Compiler Infrastructure
				//
				// This file is distributed under the University of Illinois Open Source
				// License. See LICENSE.TXT for details.
				//
				//===----------------------------------------------------------------------===//
				//
				// Performs general IR level optimizations on SVE intrinsics.
				//
				andwarUnsubmitted Done Reply Inline Actions I don't see any documentation for the pass that's being added here (apart from the commit msg). Perhaps it's worth expanding this comment? andwar: I don't see any documentation for the pass that's being added here (apart from the commit msg).
				// The main goal of this pass is to remove unnecessary reinterpret
				// intrinsics (llvm.aarch64.sve.convert.[to\|from].svbool), e.g:
				//
				// %1 = @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %a)
				// %2 = @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
				//
				// This pass also looks for ptest intrinsics & phi instructions where the
				// operands are being needlessly converted to and from svbool_t.
				//
				//===----------------------------------------------------------------------===//

				#include "Utils/AArch64BaseInfo.h"
				#include "llvm/ADT/PostOrderIterator.h"
				#include "llvm/ADT/SetVector.h"
				#include "llvm/IR/Constants.h"
				#include "llvm/IR/Dominators.h"
				#include "llvm/IR/IRBuilder.h"
				#include "llvm/IR/Instructions.h"
				#include "llvm/IR/IntrinsicInst.h"
				#include "llvm/IR/IntrinsicsAArch64.h"
				andwarUnsubmitted Done Reply Inline Actions In AArch64TargetMachine.cpp it's: static cl::opt<bool> EnableSVEIntrinsicOpts( "aarch64-sve-intrinsic-opts", cl::Hidden, cl::desc("Enable SVE intrinsic opts"), cl::init(true)); so it probably make sense to use `see-intrinsic-opts` here as well? andwar: In AArch64TargetMachine.cpp it's: ``` static cl::opt<bool> EnableSVEIntrinsicOpts( "aarch64…
				#include "llvm/IR/LLVMContext.h"
				#include "llvm/IR/PatternMatch.h"
				#include "llvm/InitializePasses.h"
				#include "llvm/Support/Debug.h"

				using namespace llvm;
				using namespace llvm::PatternMatch;

				#define DEBUG_TYPE "sve-intrinsic-opts"

				namespace llvm {
				void initializeSVEIntrinsicOptsPass(PassRegistry &);
				}

				namespace {
				struct SVEIntrinsicOpts : public ModulePass {
				static char ID; // Pass identification, replacement for typeid
				SVEIntrinsicOpts() : ModulePass(ID) {
				initializeSVEIntrinsicOptsPass(*PassRegistry::getPassRegistry());
				}

				bool runOnModule(Module &M) override;
				void getAnalysisUsage(AnalysisUsage &AU) const override;

				private:
				static IntrinsicInst isReinterpretFromSVBool(Value V);
				efriedmaUnsubmitted Done Reply Inline Actions `processPhiNode(IntrinsicInst I)`? efriedma:* `processPhiNode(IntrinsicInst *I)`?
				static IntrinsicInst isReinterpretToSVBool(Value V);

				static bool optimizeIntrinsic(Instruction *I);

				bool optimizeFunctions(SmallSetVector<Function *, 4> &Functions);

				static bool optimizeConvertFromSVBool(IntrinsicInst *I);
				static bool optimizePTest(IntrinsicInst *I);

				static bool processPhiNode(IntrinsicInst *I);
				};
				} // end anonymous namespace

				void SVEIntrinsicOpts::getAnalysisUsage(AnalysisUsage &AU) const {
				AU.addRequired<DominatorTreeWrapperPass>();
				AU.setPreservesCFG();
				}

				char SVEIntrinsicOpts::ID = 0;
				static const char *name = "SVE intrinsics optimizations";
				INITIALIZE_PASS_BEGIN(SVEIntrinsicOpts, DEBUG_TYPE, name, false, false)
				INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
				INITIALIZE_PASS_END(SVEIntrinsicOpts, DEBUG_TYPE, name, false, false)

				namespace llvm {
				ModulePass *createSVEIntrinsicOptsPass() { return new SVEIntrinsicOpts(); }
				} // namespace llvm

				/// Returns V if it's a cast from <n x 16 x i1> (aka svbool_t), nullptr
				/// otherwise.
				IntrinsicInst SVEIntrinsicOpts::isReinterpretToSVBool(Value V) {
				IntrinsicInst *I = dyn_cast<IntrinsicInst>(V);
				if (!I)
				return nullptr;

				if (I->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
				return nullptr;

				return I;
				}

				/// Returns V if it's a cast to <n x 16 x i1> (aka svbool_t), nullptr otherwise.
				IntrinsicInst SVEIntrinsicOpts::isReinterpretFromSVBool(Value V) {
				IntrinsicInst *I = dyn_cast<IntrinsicInst>(V);
				if (!I)
				return nullptr;

				if (I->getIntrinsicID() != Intrinsic::aarch64_sve_convert_from_svbool)
				return nullptr;

				return I;
				}

				efriedmaUnsubmitted Done Reply Inline Actions Please use getArgOperand() to get the arguments of calls. efriedma: Please use getArgOperand() to get the arguments of calls.
				/// The function will remove redundant reinterprets casting in the presence
				andwarUnsubmitted Done Reply Inline Actions Given where this method is called, this is guaranteed to be always non-null. Perhaps `assert` instead? andwar: Given where this method is called, this is guaranteed to be always non-null. Perhaps `assert`…
				/// of the control flow
				bool SVEIntrinsicOpts::processPhiNode(IntrinsicInst *X) {

				SmallVector<Instruction *, 32> Worklist;
				auto RequiredType = X->getType();

				auto *PN = dyn_cast<PHINode>(X->getArgOperand(0));
				assert(PN && "Expected Phi Node!");

				// Don't create a new Phi unless we can remove the old one.
				andwarUnsubmitted Not Done Reply Inline Actions Isn't it guaranteed that `RequiredType == Reinterpret->getArgOperand(0)->getType()` is always true? I.e., `PN` and the incoming values have identical type. andwar: Isn't it guaranteed that `RequiredType == Reinterpret->getArgOperand(0)->getType()` is always…
				kmclaughlinAuthorUnsubmitted Done Reply Inline Actions The incoming values to `PN` will all have the same type, but this is making sure that the reinterprets are all converting from the same type (there is a test for this in sve-intrinsic-opts-reinterpret.ll called `reinterpret_reductions_1`, where the arguments to convert.to.svbool are a mix of nxv2i1 and nxv4i1) kmclaughlin: The incoming values to `PN` will all have the same type, but this is making sure that the…
				if (!PN->hasOneUse())
				return false;

				for (Value *IncValPhi : PN->incoming_values()) {
				auto *Reinterpret = isReinterpretToSVBool(IncValPhi);
				andwarUnsubmitted Done Reply Inline Actions [nit] Perhaps `Ctx` instead of `C1`? andwar: [nit] Perhaps `Ctx` instead of `C1`?
				if (!Reinterpret \|\|
				RequiredType != Reinterpret->getArgOperand(0)->getType())
				return false;
				}

				// Create the new Phi
				andwarUnsubmitted Done Reply Inline Actions `i` -> `I` andwar: `i` -> `I`
				LLVMContext &Ctx = PN->getContext();
				IRBuilder<> Builder(Ctx);
				Builder.SetInsertPoint(PN);
				PHINode *NPN = Builder.CreatePHI(RequiredType, PN->getNumIncomingValues());
				Worklist.push_back(PN);

				for (unsigned I = 0; I < PN->getNumIncomingValues(); I++) {
				auto *Reinterpret = cast<Instruction>(PN->getIncomingValue(I));
				NPN->addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(I));
				Worklist.push_back(Reinterpret);
				}

				// Cleanup Phi Node and reinterprets
				X->replaceAllUsesWith(NPN);
				X->eraseFromParent();

				for (auto &I : Worklist)
				if (I->use_empty())
				I->eraseFromParent();

				return true;
				}

				bool SVEIntrinsicOpts::optimizePTest(IntrinsicInst *I) {
				IntrinsicInst *Op1 = dyn_cast<IntrinsicInst>(I->getArgOperand(0));
				IntrinsicInst *Op2 = dyn_cast<IntrinsicInst>(I->getArgOperand(1));

				if (Op1 && Op2 &&
				Op1->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
				Op2->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
				Op1->getArgOperand(0)->getType() == Op2->getArgOperand(0)->getType()) {

				Value *Ops[] = {Op1->getArgOperand(0), Op2->getArgOperand(0)};
				Type *Tys[] = {Op1->getArgOperand(0)->getType()};
				Module *M = I->getParent()->getParent()->getParent();

				auto Fn = Intrinsic::getDeclaration(M, I->getIntrinsicID(), Tys);
				auto CI = CallInst::Create(Fn, Ops, I->getName(), I);

				I->replaceAllUsesWith(CI);
				I->eraseFromParent();
				if (Op1->use_empty())
				Op1->eraseFromParent();
				if (Op2->use_empty())
				Op2->eraseFromParent();

				return true;
				}

				return false;
				}

				bool SVEIntrinsicOpts::optimizeConvertFromSVBool(IntrinsicInst *I) {
				assert(isReinterpretFromSVBool(I));

				// If the reinterpret instruction operand is a PHI Node
				andwarUnsubmitted Done Reply Inline Actions I'd be tempted to rewrite this to use early exit (https://llvm.org/docs/CodingStandards.html#use-early-exits-and-continue-to-simplify-code): auto Y = isReinterpretToSVBool(I->getArgOperand(0)); if (nullptr == Y) return false; // The rest of the function andwar:* I'd be tempted to rewrite this to use early exit (https://llvm.org/docs/CodingStandards.
				if (isa<PHINode>(I->getArgOperand(0)))
				return processPhiNode(I);

				// If we have a reinterpret intrinsic I of type A which is converting from
				// another reinterpret Y of type B, and the source type of Y is A, then we can
				// elide away both reinterprets if there are no other users of Y.
				auto *Y = isReinterpretToSVBool(I->getArgOperand(0));
				if (!Y)
				return false;

				Value *SourceVal = Y->getArgOperand(0);
				if (I->getType() != SourceVal->getType())
				return false;

				I->replaceAllUsesWith(SourceVal);
				I->eraseFromParent();
				if (Y->use_empty())
				Y->eraseFromParent();

				return true;
				}

				bool SVEIntrinsicOpts::optimizeIntrinsic(Instruction *I) {
				IntrinsicInst *IntrI = dyn_cast<IntrinsicInst>(I);
				if (!IntrI)
				return false;

				switch (IntrI->getIntrinsicID()) {
				case Intrinsic::aarch64_sve_convert_from_svbool:
				return optimizeConvertFromSVBool(IntrI);
				case Intrinsic::aarch64_sve_ptest_any:
				case Intrinsic::aarch64_sve_ptest_first:
				case Intrinsic::aarch64_sve_ptest_last:
				return optimizePTest(IntrI);
				andwarUnsubmitted Done Reply Inline Actions I think that you could simplify things a bit by using `SmallPtrSet` instead: http://llvm.org/docs/ProgrammersManual.html#dss-smallptrset. With a set you can avoid explicit checks like this: std::find(Functions.begin(), Functions.end(), Inst->getFunction()) == Functions.end() With `SmallPtrSet` you can write less code :) andwar: I think that you could simplify things a bit by using `SmallPtrSet` instead: http://llvm.
				default:
				return false;
				}
				andwarUnsubmitted Not Done Reply Inline Actions Could this be a for-range loop instead? This loop seems to be a perfect candidate for `make_early_inc_range` (https://github.com/llvm/llvm-project/blob/172f1460ae05ab5c33c757142c8bdb10acfbdbe1/llvm/include/llvm/ADT/STLExtras.h#L499), e.g. for (Instruction &I : make_early_inc_range(BB)) Changed \|= optimizeIntrinsic(&I); andwar: 1. Could this be a for-range loop instead? 2. This loop seems to be a perfect candidate for…
				kmclaughlinAuthorUnsubmitted Done Reply Inline Actions Changed this to use `make_early_inc_range` as suggested kmclaughlin: Changed this to use `make_early_inc_range` as suggested

				return true;
				}

				bool SVEIntrinsicOpts::optimizeFunctions(
				SmallSetVector<Function *, 4> &Functions) {
				bool Changed = false;
				for (auto *F : Functions) {
				DominatorTree DT = &getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
				efriedmaUnsubmitted Done Reply Inline Actions Iterating over a SmallPtrSet is non-deterministic. In this context, probably SetVector is the right data structure. efriedma: Iterating over a SmallPtrSet is non-deterministic. In this context, probably SetVector is the…

				efriedmaUnsubmitted Not Done Reply Inline Actions You might want to check whether the module actually declares any of the SVE intrinsics before you iterate over the whole function. efriedma: You might want to check whether the module actually declares any of the SVE intrinsics before…
				kmclaughlinAuthorUnsubmitted Done Reply Inline Actions Thanks for the suggestion - I changed this to a module pass so that we can check if any of the SVE intrinsics we are interested in are declared first. kmclaughlin: Thanks for the suggestion - I changed this to a module pass so that we can check if any of the…
				// Traverse the DT with an rpo walk so we see defs before uses, allowing
				// simplification to be done incrementally.
				BasicBlock *Root = DT->getRoot();
				ReversePostOrderTraversal<BasicBlock *> RPOT(Root);
				for (auto *BB : RPOT)
				for (Instruction &I : make_early_inc_range(*BB))
				andwarUnsubmitted Done Reply Inline Actions AFAIK, this iterates over BBs so `I` should be replaced with `BB`. Also, perhaps `for (auto BB : RPOT) {` instead? andwar:* AFAIK, this iterates over BBs so `I` should be replaced with `BB`. Also, perhaps `for (auto…
				Changed \|= optimizeIntrinsic(&I);
				andwarUnsubmitted Done Reply Inline Actions Could you decorate this `for` loop with a comment explaining `what` kind of data is generated here and `why`? Ta! andwar: Could you decorate this `for` loop with a comment explaining `what` kind of data is generated…
				}
				return Changed;
				}

				bool SVEIntrinsicOpts::runOnModule(Module &M) {
				bool Changed = false;
				SmallSetVector<Function *, 4> Functions;

				// Check for SVE intrinsic declarations first so that we only iterate over
				// relevant functions. Where an appropriate declaration is found, store the
				// function(s) where it is used so we can target these only.
				for (auto &F : M.getFunctionList()) {
				if (!F.isDeclaration())
				continue;

				switch (F.getIntrinsicID()) {
				case Intrinsic::aarch64_sve_convert_from_svbool:
				case Intrinsic::aarch64_sve_ptest_any:
				case Intrinsic::aarch64_sve_ptest_first:
				case Intrinsic::aarch64_sve_ptest_last:
				for (auto I = F.user_begin(), E = F.user_end(); I != E;) {
				auto Inst = dyn_cast<Instruction>(I++);
				Functions.insert(Inst->getFunction());
				}
				break;
				default:
				break;
				}
				}

				if (!Functions.empty())
				Changed \|= optimizeFunctions(Functions);

				return Changed;
				}

llvm/test/CodeGen/AArch64/O3-pipeline.ll

	Show All 12 Lines
	; CHECK-NEXT: Scoped NoAlias Alias Analysis			; CHECK-NEXT: Scoped NoAlias Alias Analysis
	; CHECK-NEXT: Profile summary info			; CHECK-NEXT: Profile summary info
	; CHECK-NEXT: Create Garbage Collector Module Metadata			; CHECK-NEXT: Create Garbage Collector Module Metadata
	; CHECK-NEXT: Machine Branch Probability Analysis			; CHECK-NEXT: Machine Branch Probability Analysis
	; CHECK-NEXT: ModulePass Manager			; CHECK-NEXT: ModulePass Manager
	; CHECK-NEXT: Pre-ISel Intrinsic Lowering			; CHECK-NEXT: Pre-ISel Intrinsic Lowering
	; CHECK-NEXT: FunctionPass Manager			; CHECK-NEXT: FunctionPass Manager
	; CHECK-NEXT: Expand Atomic instructions			; CHECK-NEXT: Expand Atomic instructions
				; CHECK-NEXT: SVE intrinsics optimizations
				; CHECK-NEXT: FunctionPass Manager
				; CHECK-NEXT: Dominator Tree Construction
				; CHECK-NEXT: FunctionPass Manager
	; CHECK-NEXT: Simplify the CFG			; CHECK-NEXT: Simplify the CFG
	; CHECK-NEXT: Dominator Tree Construction			; CHECK-NEXT: Dominator Tree Construction
	; CHECK-NEXT: Natural Loop Information			; CHECK-NEXT: Natural Loop Information
	; CHECK-NEXT: Lazy Branch Probability Analysis			; CHECK-NEXT: Lazy Branch Probability Analysis
	; CHECK-NEXT: Lazy Block Frequency Analysis			; CHECK-NEXT: Lazy Block Frequency Analysis
	; CHECK-NEXT: Optimization Remark Emitter			; CHECK-NEXT: Optimization Remark Emitter
	; CHECK-NEXT: Scalar Evolution Analysis			; CHECK-NEXT: Scalar Evolution Analysis
	; CHECK-NEXT: Loop Data Prefetch			; CHECK-NEXT: Loop Data Prefetch
	▲ Show 20 Lines • Show All 171 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve-intrinsic-opts-ptest.ll

This file was added.

				; RUN: opt -S -sve-intrinsic-opts -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck --check-prefix OPT %s

				define i1 @ptest_any1(<vscale x 2 x i1> %a) {
				; OPT-LABEL: ptest_any1
				; OPT: %mask = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 0)
				; OPT-NOT: convert
				; OPT-NEXT: %[[OUT:.*]] = call i1 @llvm.aarch64.sve.ptest.any.nxv2i1(<vscale x 2 x i1> %mask, <vscale x 2 x i1> %a)
				; OPT-NEXT: ret i1 %[[OUT]]
				%mask = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 0)
				%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %mask)
				%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
				%out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %2)
				ret i1 %out
				}

				; No transform because the ptest is using differently sized operands.
				define i1 @ptest_any2(<vscale x 4 x i1> %a) {
				; OPT-LABEL: ptest_any2
				; OPT: %mask = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
				andwarUnsubmitted Not Done Reply Inline Actions What's `%1` and `%2`? Is it worth adding the calls that generated them in the expected output? andwar: What's `%1` and `%2`? Is it worth adding the calls that generated them in the expected output?
				kmclaughlinAuthorUnsubmitted Done Reply Inline Actions I think that would make sense. I've added `%1` and `%2` to the expected output and added more checks to the other tests here. kmclaughlin: I think that would make sense. I've added `%1` and `%2` to the expected output and added more…
				; OPT-NEXT: %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %mask)
				; OPT-NEXT: %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %a)
				; OPT-NEXT: %out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %2)
				%mask = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
				%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %mask)
				%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %a)
				%out = call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %2)
				ret i1 %out
				}

				define i1 @ptest_first(<vscale x 4 x i1> %a) {
				; OPT-LABEL: ptest_first
				; OPT: %mask = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 0)
				; OPT-NOT: convert
				; OPT-NEXT: %[[OUT:.*]] = call i1 @llvm.aarch64.sve.ptest.first.nxv4i1(<vscale x 4 x i1> %mask, <vscale x 4 x i1> %a)
				; OPT-NEXT: ret i1 %[[OUT]]
				%mask = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 0)
				%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %mask)
				%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %a)
				%out = call i1 @llvm.aarch64.sve.ptest.first.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %2)
				ret i1 %out
				}

				define i1 @ptest_last(<vscale x 8 x i1> %a) {
				; OPT-LABEL: ptest_last
				; OPT: %mask = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 0)
				; OPT-NOT: convert
				; OPT-NEXT: %[[OUT:.*]] = call i1 @llvm.aarch64.sve.ptest.last.nxv8i1(<vscale x 8 x i1> %mask, <vscale x 8 x i1> %a)
				; OPT-NEXT: ret i1 %[[OUT]]
				%mask = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 0)
				%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %mask)
				%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
				%out = call i1 @llvm.aarch64.sve.ptest.last.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %2)
				ret i1 %out
				}

				declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
				declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
				declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
				declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)

				declare i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
				declare i1 @llvm.aarch64.sve.ptest.first.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
				declare i1 @llvm.aarch64.sve.ptest.last.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)

				declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
				declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
				declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)

llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll

This file was added.

				; RUN: opt -S -sve-intrinsic-opts -mtriple=aarch64-linux-gnu -mattr=+sve < %s \| FileCheck --check-prefix OPT %s

				define <vscale x 8 x i1> @reinterpret_test_h(<vscale x 8 x i1> %a) {
				; OPT-LABEL: @reinterpret_test_h(
				; OPT-NOT: convert
				; OPT: ret <vscale x 8 x i1> %a
				%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %a)
				%2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %1)
				ret <vscale x 8 x i1> %2
				}

				; Reinterprets are not redundant because the second reinterpret zeros the
				; lanes that don't exist within its input.
				define <vscale x 16 x i1> @reinterpret_test_h_rev(<vscale x 16 x i1> %a) {
				; OPT-LABEL: @reinterpret_test_h_rev(
				; OPT: %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %a)
				; OPT-NEXT: %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %1)
				; OPT-NEXT: ret <vscale x 16 x i1> %2
				%1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %a)
				%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %1)
				ret <vscale x 16 x i1> %2
				}

				define <vscale x 4 x i1> @reinterpret_test_w(<vscale x 4 x i1> %a) {
				; OPT-LABEL: @reinterpret_test_w(
				; OPT-NOT: convert
				; OPT: ret <vscale x 4 x i1> %a
				%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %a)
				%2 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1)
				ret <vscale x 4 x i1> %2
				}

				; Reinterprets are not redundant because the second reinterpret zeros the
				; lanes that don't exist within its input.
				define <vscale x 16 x i1> @reinterpret_test_w_rev(<vscale x 16 x i1> %a) {
				; OPT-LABEL: @reinterpret_test_w_rev(
				; OPT: %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %a)
				; OPT-NEXT: %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1)
				; OPT-NEXT: ret <vscale x 16 x i1> %2
				%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %a)
				%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1)
				ret <vscale x 16 x i1> %2
				}

				define <vscale x 2 x i1> @reinterpret_test_d(<vscale x 2 x i1> %a) {
				; OPT-LABEL: @reinterpret_test_d(
				; OPT-NOT: convert
				; OPT: ret <vscale x 2 x i1> %a
				%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
				%2 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %1)
				ret <vscale x 2 x i1> %2
				}

				; Reinterprets are not redundant because the second reinterpret zeros the
				; lanes that don't exist within its input.
				define <vscale x 16 x i1> @reinterpret_test_d_rev(<vscale x 16 x i1> %a) {
				; OPT-LABEL: @reinterpret_test_d_rev(
				; OPT: %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %a)
				; OPT-NEXT: %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %1)
				; OPT-NEXT: ret <vscale x 16 x i1> %2
				%1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %a)
				%2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %1)
				ret <vscale x 16 x i1> %2
				}

				define <vscale x 2 x i1> @reinterpret_reductions(i32 %cond, <vscale x 2 x i1> %a, <vscale x 2 x i1> %b, <vscale x 2 x i1> %c) {
				; OPT-LABEL: reinterpret_reductions
				; OPT-NOT: convert
				; OPT-NOT: phi <vscale x 16 x i1>
				; OPT: phi <vscale x 2 x i1> [ %a, %br_phi_a ], [ %b, %br_phi_b ], [ %c, %br_phi_c ]
				; OPT-NOT: convert
				; OPT: ret

				entry:
				switch i32 %cond, label %br_phi_c [
				i32 43, label %br_phi_a
				i32 45, label %br_phi_b
				]

				br_phi_a:
				%a1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
				br label %join

				br_phi_b:
				%b1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %b)
				br label %join

				br_phi_c:
				%c1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %c)
				br label %join

				join:
				%pg = phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
				%pg1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
				ret <vscale x 2 x i1> %pg1
				}

				; No transform as the reinterprets are converting from different types (nxv2i1 & nxv4i1)
				; As the incoming values to the phi must all be the same type, we cannot remove the reinterprets.
				define <vscale x 2 x i1> @reinterpret_reductions_1(i32 %cond, <vscale x 2 x i1> %a, <vscale x 4 x i1> %b, <vscale x 2 x i1> %c) {
				; OPT-LABEL: reinterpret_reductions_1
				; OPT: convert
				; OPT: phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
				; OPT-NOT: phi <vscale x 2 x i1>
				; OPT: tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
				; OPT: ret

				entry:
				switch i32 %cond, label %br_phi_c [
				i32 43, label %br_phi_a
				i32 45, label %br_phi_b
				]

				br_phi_a:
				%a1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
				br label %join

				br_phi_b:
				%b1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %b)
				br label %join

				br_phi_c:
				%c1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %c)
				br label %join

				join:
				%pg = phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
				%pg1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
				ret <vscale x 2 x i1> %pg1
				}

				; No transform. Similar to the the test above, but here only two of the arguments need to
				; be converted to svbool.
				define <vscale x 2 x i1> @reinterpret_reductions_2(i32 %cond, <vscale x 2 x i1> %a, <vscale x 16 x i1> %b, <vscale x 2 x i1> %c) {
				; OPT-LABEL: reinterpret_reductions_2
				; OPT: convert
				; OPT: phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b, %br_phi_b ], [ %c1, %br_phi_c ]
				; OPT-NOT: phi <vscale x 2 x i1>
				; OPT: tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
				; OPT: ret

				entry:
				switch i32 %cond, label %br_phi_c [
				i32 43, label %br_phi_a
				i32 45, label %br_phi_b
				]

				br_phi_a:
				%a1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
				br label %join

				br_phi_b:
				br label %join

				br_phi_c:
				%c1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %c)
				br label %join

				join:
				%pg = phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b, %br_phi_b ], [ %c1, %br_phi_c ]
				%pg1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
				ret <vscale x 2 x i1> %pg1
				}

				; Similar to reinterpret_reductions but the reinterprets remain because the
				; original phi cannot be removed (i.e. prefer reinterprets over multiple phis).
				define <vscale x 16 x i1> @reinterpret_reductions3(i32 %cond, <vscale x 2 x i1> %a, <vscale x 2 x i1> %b, <vscale x 2 x i1> %c) {
				; OPT-LABEL: reinterpret_reductions3
				; OPT: phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
				; OPT-NOT: phi <vscale x 2 x i1>
				; OPT: tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
				; OPT-NEXT: ret <vscale x 16 x i1> %pg

				entry:
				switch i32 %cond, label %br_phi_c [
				i32 43, label %br_phi_a
				i32 45, label %br_phi_b
				]

				br_phi_a:
				%a1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %a)
				br label %join

				br_phi_b:
				%b1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %b)
				br label %join

				br_phi_c:
				%c1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %c)
				br label %join

				join:
				%pg = phi <vscale x 16 x i1> [ %a1, %br_phi_a ], [ %b1, %br_phi_b ], [ %c1, %br_phi_c ]
				%pg1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
				ret <vscale x 16 x i1> %pg
				}

				declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
				declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
				declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
				declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
				declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
				declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>)

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Add a pass for SVE intrinsic optimisations
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 257258

llvm/lib/Target/AArch64/AArch64.h

llvm/lib/Target/AArch64/AArch64TargetMachine.cpp

llvm/lib/Target/AArch64/CMakeLists.txt

llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp

llvm/test/CodeGen/AArch64/O3-pipeline.ll

llvm/test/CodeGen/AArch64/sve-intrinsic-opts-ptest.ll

llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Add a pass for SVE intrinsic optimisationsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 257258

llvm/lib/Target/AArch64/AArch64.h

llvm/lib/Target/AArch64/AArch64TargetMachine.cpp

llvm/lib/Target/AArch64/CMakeLists.txt

llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp

llvm/test/CodeGen/AArch64/O3-pipeline.ll

llvm/test/CodeGen/AArch64/sve-intrinsic-opts-ptest.ll

llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll

[AArch64][SVE] Add a pass for SVE intrinsic optimisations
ClosedPublic