This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/
-
lib/Target/AArch64/
-
Target/
-
AArch64/
-
AArch64.h
4/5
AArch64StackTagging.cpp
-
AArch64TargetMachine.cpp
-
test/CodeGen/AArch64/
-
CodeGen/
-
AArch64/
-
O3-pipeline.ll
-
stack-tagging-initializer-merge.ll

Differential D66167

MemTag: stack initializer merging.
ClosedPublic

Authored by eugenis on Aug 13 2019, 2:06 PM.

Download Raw Diff

Details

Reviewers

pcc
vitalybuka
ostannard

Commits

rG50affbe47fc9: MemTag: stack initializer merging.
rL369297: MemTag: stack initializer merging.

Summary

MTE provides instructions to update memory tags and data at the same
time. This change makes use of those to generate more compact code for
stack variable tagging + initialization.

We collect memory store and memset instructions following an alloca or a
lifetime.start call, and replace them with the corresponding MTE
intrinsics. Since the intrinsics work on 16-byte aligned chunks, the
stored values are combined as necessary.

Diff Detail

Repository

rG LLVM Github Monorepo

Build Status

Buildable 36969
Build 36968: arc lint + arc unit

Event Timeline

eugenis created this revision.Aug 13 2019, 2:06 PM

Herald added a project: Restricted Project. · View Herald TranscriptAug 13 2019, 2:06 PM

Herald added subscribers: hiraditya, javed.absar, srhines. · View Herald Transcript

Harbormaster completed remote builds in B36693: Diff 214915.Aug 13 2019, 2:06 PM

pcc added inline comments.Aug 15 2019, 4:28 PM

llvm/lib/Target/AArch64/AArch64StackTagging.cpp
196	This could call `emitUndef`, right? Is there an advantage in using `STZG` instead of `STG` here, and if so, should we be doing the same thing on line 181?

eugenis marked an inline comment as done.Aug 15 2019, 4:52 PM

eugenis added inline comments.

llvm/lib/Target/AArch64/AArch64StackTagging.cpp
196	No, because memset(0) does not update Out[] (see applyMemset), so we don't know if the tail is zero or undef. Line 181 is different - with Ranges.empty() we know that the entire allocation is undef. If we wanted to change this, Out[] would need to remember which bytes have been written to in a separate mask. I did not do this because STG and STGZ are expected to have approximately same overhead.

LGTM

llvm/lib/Target/AArch64/AArch64StackTagging.cpp
196	I see. I'd probably leave a comment about that here. I guess an alternative would be to eliminate this special case by also making memset(0) update Out. Probably doesn't matter much for now, though.
253	Is there test coverage for this code? Please add if not.
344	Remove braces

This revision is now accepted and ready to land.Aug 15 2019, 5:23 PM

addressed review comments

Harbormaster completed remote builds in B36969: Diff 215974.Aug 19 2019, 1:31 PM

eugenis marked an inline comment as done.Aug 19 2019, 1:32 PM

Closed by commit rL369297: MemTag: stack initializer merging. (authored by eugenis). · Explain WhyAug 19 2019, 1:46 PM

This revision was automatically updated to reflect the committed changes.

Revision Contents

Path

Size

llvm/

lib/

Target/

AArch64/

AArch64.h

2 lines

AArch64StackTagging.cpp

302 lines

AArch64TargetMachine.cpp

3 lines

test/

CodeGen/

AArch64/

O3-pipeline.ll

2 lines

stack-tagging-initializer-merge.ll

308 lines

Diff 215974

llvm/lib/Target/AArch64/AArch64.h

	Show First 20 Lines • Show All 50 Lines • ▼ Show 20 Lines

	FunctionPass *createAArch64CleanupLocalDynamicTLSPass();			FunctionPass *createAArch64CleanupLocalDynamicTLSPass();

	FunctionPass *createAArch64CollectLOHPass();			FunctionPass *createAArch64CollectLOHPass();
	InstructionSelector *			InstructionSelector *
	createAArch64InstructionSelector(const AArch64TargetMachine &,			createAArch64InstructionSelector(const AArch64TargetMachine &,
	AArch64Subtarget &, AArch64RegisterBankInfo &);			AArch64Subtarget &, AArch64RegisterBankInfo &);
	FunctionPass *createAArch64PreLegalizeCombiner();			FunctionPass *createAArch64PreLegalizeCombiner();
	FunctionPass *createAArch64StackTaggingPass();			FunctionPass *createAArch64StackTaggingPass(bool MergeInit);

	void initializeAArch64A53Fix835769Pass(PassRegistry&);			void initializeAArch64A53Fix835769Pass(PassRegistry&);
	void initializeAArch64A57FPLoadBalancingPass(PassRegistry&);			void initializeAArch64A57FPLoadBalancingPass(PassRegistry&);
	void initializeAArch64AdvSIMDScalarPass(PassRegistry&);			void initializeAArch64AdvSIMDScalarPass(PassRegistry&);
	void initializeAArch64BranchTargetsPass(PassRegistry&);			void initializeAArch64BranchTargetsPass(PassRegistry&);
	void initializeAArch64CollectLOHPass(PassRegistry&);			void initializeAArch64CollectLOHPass(PassRegistry&);
	void initializeAArch64CondBrTuningPass(PassRegistry &);			void initializeAArch64CondBrTuningPass(PassRegistry &);
	void initializeAArch64CompressJumpTablesPass(PassRegistry&);			void initializeAArch64CompressJumpTablesPass(PassRegistry&);
	Show All 18 Lines

llvm/lib/Target/AArch64/AArch64StackTagging.cpp

	Show First 20 Lines • Show All 49 Lines • ▼ Show 20 Lines
	#include <cassert>			#include <cassert>
	#include <iterator>			#include <iterator>
	#include <utility>			#include <utility>

	using namespace llvm;			using namespace llvm;

	#define DEBUG_TYPE "stack-tagging"			#define DEBUG_TYPE "stack-tagging"

				static cl::opt<bool> ClMergeInit(
				"stack-tagging-merge-init", cl::Hidden, cl::init(true), cl::ZeroOrMore,
				cl::desc("merge stack variable initializers with tagging when possible"));

				static cl::opt<unsigned> ClScanLimit("stack-tagging-merge-init-scan-limit",
				cl::init(40), cl::Hidden);

	static constexpr unsigned kTagGranuleSize = 16;			static constexpr unsigned kTagGranuleSize = 16;

	namespace {			namespace {

				class InitializerBuilder {
				uint64_t Size;
				const DataLayout *DL;
				Value *BasePtr;
				Function *SetTagFn;
				Function *SetTagZeroFn;
				Function *StgpFn;

				// List of initializers sorted by start offset.
				struct Range {
				uint64_t Start, End;
				Instruction *Inst;
				};
				SmallVector<Range, 4> Ranges;
				// 8-aligned offset => 8-byte initializer
				// Missing keys are zero initialized.
				std::map<uint64_t, Value *> Out;

				public:
				InitializerBuilder(uint64_t Size, const DataLayout DL, Value BasePtr,
				Function SetTagFn, Function SetTagZeroFn,
				Function *StgpFn)
				: Size(Size), DL(DL), BasePtr(BasePtr), SetTagFn(SetTagFn),
				SetTagZeroFn(SetTagZeroFn), StgpFn(StgpFn) {}

				bool addRange(uint64_t Start, uint64_t End, Instruction *Inst) {
				auto I = std::lower_bound(
				Ranges.begin(), Ranges.end(), Start,
				[](const Range &LHS, uint64_t RHS) { return LHS.End <= RHS; });
				if (I != Ranges.end() && End > I->Start) {
				// Overlap - bail.
				return false;
				}
				Ranges.insert(I, {Start, End, Inst});
				return true;
				}

				bool addStore(uint64_t Offset, StoreInst SI, const DataLayout DL) {
				int64_t StoreSize = DL->getTypeStoreSize(SI->getOperand(0)->getType());
				if (!addRange(Offset, Offset + StoreSize, SI))
				return false;
				IRBuilder<> IRB(SI);
				applyStore(IRB, Offset, Offset + StoreSize, SI->getOperand(0));
				return true;
				}

				bool addMemSet(uint64_t Offset, MemSetInst *MSI) {
				uint64_t StoreSize = cast<ConstantInt>(MSI->getLength())->getZExtValue();
				if (!addRange(Offset, Offset + StoreSize, MSI))
				return false;
				IRBuilder<> IRB(MSI);
				applyMemSet(IRB, Offset, Offset + StoreSize,
				cast<ConstantInt>(MSI->getValue()));
				return true;
				}

				void applyMemSet(IRBuilder<> &IRB, int64_t Start, int64_t End,
				ConstantInt *V) {
				// Out[] does not distinguish between zero and undef, and we already know
				// that this memset does not overlap with any other initializer. Nothing to
				// do for memset(0).
				if (V->isZero())
				return;
				for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) {
				uint64_t Cst = 0x0101010101010101UL;
				int LowBits = Offset < Start ? (Start - Offset) * 8 : 0;
				if (LowBits)
				Cst = (Cst >> LowBits) << LowBits;
				int HighBits = End - Offset < 8 ? (8 - (End - Offset)) * 8 : 0;
				if (HighBits)
				Cst = (Cst << HighBits) >> HighBits;
				ConstantInt *C =
				ConstantInt::get(IRB.getInt64Ty(), Cst * V->getZExtValue());

				Value *&CurrentV = Out[Offset];
				if (!CurrentV) {
				CurrentV = C;
				} else {
				CurrentV = IRB.CreateOr(CurrentV, C);
				}
				}
				}

				// Take a 64-bit slice of the value starting at the given offset (in bytes).
				// Offset can be negative. Pad with zeroes on both sides when necessary.
				Value sliceValue(IRBuilder<> &IRB, Value V, int64_t Offset) {
				if (Offset > 0) {
				V = IRB.CreateLShr(V, Offset * 8);
				V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty());
				} else if (Offset < 0) {
				V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty());
				V = IRB.CreateShl(V, -Offset * 8);
				} else {
				V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty());
				}
				return V;
				}

				void applyStore(IRBuilder<> &IRB, int64_t Start, int64_t End,
				Value *StoredValue) {
				StoredValue = flatten(IRB, StoredValue);
				for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) {
				Value *V = sliceValue(IRB, StoredValue, Offset - Start);
				Value *&CurrentV = Out[Offset];
				if (!CurrentV) {
				CurrentV = V;
				} else {
				CurrentV = IRB.CreateOr(CurrentV, V);
				}
				}
				}

				void generate(IRBuilder<> &IRB) {
				LLVM_DEBUG(dbgs() << "Combined initializer\n");
				// No initializers => the entire allocation is undef.
				if (Ranges.empty()) {
				emitUndef(IRB, 0, Size);
				return;
				}

				// Look through 8-byte initializer list 16 bytes at a time;
				// If one of the two 8-byte halfs is non-zero non-undef, emit STGP.
				// Otherwise, emit zeroes up to next available item.
				uint64_t LastOffset = 0;
				for (uint64_t Offset = 0; Offset < Size; Offset += 16) {
				auto I1 = Out.find(Offset);
				auto I2 = Out.find(Offset + 8);
				if (I1 == Out.end() && I2 == Out.end())
				pccUnsubmitted Not Done Reply Inline Actions This could call `emitUndef`, right? Is there an advantage in using `STZG` instead of `STG` here, and if so, should we be doing the same thing on line 181? pcc: This could call `emitUndef`, right? Is there an advantage in using `STZG` instead of `STG` here…
				eugenisAuthorUnsubmitted Done Reply Inline Actions No, because memset(0) does not update Out[] (see applyMemset), so we don't know if the tail is zero or undef. Line 181 is different - with Ranges.empty() we know that the entire allocation is undef. If we wanted to change this, Out[] would need to remember which bytes have been written to in a separate mask. I did not do this because STG and STGZ are expected to have approximately same overhead. eugenis: No, because memset(0) does not update Out[] (see applyMemset), so we don't know if the tail is…
				pccUnsubmitted Done Reply Inline Actions I see. I'd probably leave a comment about that here. I guess an alternative would be to eliminate this special case by also making memset(0) update Out. Probably doesn't matter much for now, though. pcc: I see. I'd probably leave a comment about that here. I guess an alternative would be to…
				continue;

				if (Offset > LastOffset)
				emitZeroes(IRB, LastOffset, Offset - LastOffset);

				Value *Store1 = I1 == Out.end() ? Constant::getNullValue(IRB.getInt64Ty())
				: I1->second;
				Value *Store2 = I2 == Out.end() ? Constant::getNullValue(IRB.getInt64Ty())
				: I2->second;
				emitPair(IRB, Offset, Store1, Store2);
				LastOffset = Offset + 16;
				}

				// memset(0) does not update Out[], therefore the tail can be either undef
				// or zero.
				if (LastOffset < Size)
				emitZeroes(IRB, LastOffset, Size - LastOffset);

				for (const auto &R : Ranges) {
				R.Inst->eraseFromParent();
				}
				}

				void emitZeroes(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) {
				LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + Size
				<< ") zero\n");
				Value *Ptr = BasePtr;
				if (Offset)
				Ptr = IRB.CreateConstGEP1_32(Ptr, Offset);
				IRB.CreateCall(SetTagZeroFn,
				{Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)});
				}

				void emitUndef(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) {
				LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + Size
				<< ") undef\n");
				Value *Ptr = BasePtr;
				if (Offset)
				Ptr = IRB.CreateConstGEP1_32(Ptr, Offset);
				IRB.CreateCall(SetTagFn, {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)});
				}

				void emitPair(IRBuilder<> &IRB, uint64_t Offset, Value A, Value B) {
				LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + 16 << "):\n");
				LLVM_DEBUG(dbgs() << " " << A << "\n " << B << "\n");
				Value *Ptr = BasePtr;
				if (Offset)
				Ptr = IRB.CreateConstGEP1_32(Ptr, Offset);
				IRB.CreateCall(StgpFn, {Ptr, A, B});
				}

				Value flatten(IRBuilder<> &IRB, Value V) {
				if (V->getType()->isIntegerTy())
				return V;
				// vector of pointers -> vector of ints
				if (VectorType *VecTy = dyn_cast<VectorType>(V->getType())) {
				LLVMContext &Ctx = IRB.getContext();
				pccUnsubmitted Done Reply Inline Actions Is there test coverage for this code? Please add if not. pcc: Is there test coverage for this code? Please add if not.
				Type *EltTy = VecTy->getElementType();
				if (EltTy->isPointerTy()) {
				uint32_t EltSize = DL->getTypeSizeInBits(EltTy);
				Type *NewTy = VectorType::get(IntegerType::get(Ctx, EltSize),
				VecTy->getNumElements());
				V = IRB.CreatePointerCast(V, NewTy);
				}
				}
				return IRB.CreateBitOrPointerCast(
				V, IRB.getIntNTy(DL->getTypeStoreSize(V->getType()) * 8));
				}
				};

	class AArch64StackTagging : public FunctionPass {			class AArch64StackTagging : public FunctionPass {
	struct AllocaInfo {			struct AllocaInfo {
	AllocaInst *AI;			AllocaInst *AI;
	SmallVector<IntrinsicInst *, 2> LifetimeStart;			SmallVector<IntrinsicInst *, 2> LifetimeStart;
	SmallVector<IntrinsicInst *, 2> LifetimeEnd;			SmallVector<IntrinsicInst *, 2> LifetimeEnd;
	SmallVector<DbgVariableIntrinsic *, 2> DbgVariableIntrinsics;			SmallVector<DbgVariableIntrinsic *, 2> DbgVariableIntrinsics;
	int Tag; // -1 for non-tagged allocations			int Tag; // -1 for non-tagged allocations
	};			};

				bool MergeInit;

	public:			public:
	static char ID; // Pass ID, replacement for typeid			static char ID; // Pass ID, replacement for typeid

	AArch64StackTagging() : FunctionPass(ID) {			AArch64StackTagging(bool MergeInit = true)
				: FunctionPass(ID),
				MergeInit(ClMergeInit.getNumOccurrences() > 0 ? ClMergeInit
				: MergeInit) {
	initializeAArch64StackTaggingPass(*PassRegistry::getPassRegistry());			initializeAArch64StackTaggingPass(*PassRegistry::getPassRegistry());
	}			}

	bool isInterestingAlloca(const AllocaInst &AI);			bool isInterestingAlloca(const AllocaInst &AI);
	void alignAndPadAlloca(AllocaInfo &Info);			void alignAndPadAlloca(AllocaInfo &Info);

	void tagAlloca(AllocaInst AI, Instruction InsertBefore, Value *Ptr,			void tagAlloca(AllocaInst AI, Instruction InsertBefore, Value *Ptr,
	uint64_t Size);			uint64_t Size);
	void untagAlloca(AllocaInst AI, Instruction InsertBefore, uint64_t Size);			void untagAlloca(AllocaInst AI, Instruction InsertBefore, uint64_t Size);

				Instruction collectInitializers(Instruction StartInst, Value *StartPtr,
				uint64_t Size, InitializerBuilder &IB);

	Instruction *			Instruction *
	insertBaseTaggedPointer(const MapVector<AllocaInst *, AllocaInfo> &Allocas,			insertBaseTaggedPointer(const MapVector<AllocaInst *, AllocaInfo> &Allocas,
	const DominatorTree *DT);			const DominatorTree *DT);
	bool runOnFunction(Function &F) override;			bool runOnFunction(Function &F) override;

	StringRef getPassName() const override { return "AArch64 Stack Tagging"; }			StringRef getPassName() const override { return "AArch64 Stack Tagging"; }

	private:			private:
	Function *F;			Function *F;
	Function *SetTagFunc;			Function *SetTagFunc;
	const DataLayout *DL;			const DataLayout *DL;
				AAResults *AA;

	void getAnalysisUsage(AnalysisUsage &AU) const override {			void getAnalysisUsage(AnalysisUsage &AU) const override {
	AU.setPreservesCFG();			AU.setPreservesCFG();
				if (MergeInit)
				AU.addRequired<AAResultsWrapperPass>();
	}			}
	};			};

	} // end anonymous namespace			} // end anonymous namespace

	char AArch64StackTagging::ID = 0;			char AArch64StackTagging::ID = 0;

	INITIALIZE_PASS_BEGIN(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging",			INITIALIZE_PASS_BEGIN(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging",
	false, false)			false, false)
	INITIALIZE_PASS_END(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging",			INITIALIZE_PASS_END(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging",
	false, false)			false, false)

	FunctionPass *llvm::createAArch64StackTaggingPass() {			FunctionPass *llvm::createAArch64StackTaggingPass(bool MergeInit) {
	return new AArch64StackTagging();			return new AArch64StackTagging(MergeInit);
				}

				Instruction AArch64StackTagging::collectInitializers(Instruction StartInst,
				Value *StartPtr,
				uint64_t Size,
				InitializerBuilder &IB) {
				MemoryLocation AllocaLoc{StartPtr, Size};
				Instruction *LastInst = StartInst;
				BasicBlock::iterator BI(StartInst);

				unsigned Count = 0;
				for (; Count < ClScanLimit && !BI->isTerminator(); ++BI) {
				if (!isa<DbgInfoIntrinsic>(*BI))
				++Count;

				if (isNoModRef(AA->getModRefInfo(&*BI, AllocaLoc)))
				pccUnsubmitted Done Reply Inline Actions Remove braces pcc: Remove braces
				continue;

				if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
				// If the instruction is readnone, ignore it, otherwise bail out. We
				// don't even allow readonly here because we don't want something like:
				// A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
				if (BI->mayWriteToMemory() \|\| BI->mayReadFromMemory())
				break;
				continue;
				}

				if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
				if (!NextStore->isSimple())
				break;

				// Check to see if this store is to a constant offset from the start ptr.
				int64_t Offset;
				if (!isPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset,
				*DL))
				break;

				if (!IB.addStore(Offset, NextStore, DL))
				break;
				LastInst = NextStore;
				} else {
				MemSetInst *MSI = cast<MemSetInst>(BI);

				if (MSI->isVolatile() \|\| !isa<ConstantInt>(MSI->getLength()))
				break;

				if (!isa<ConstantInt>(MSI->getValue()))
				break;

				// Check to see if this store is to a constant offset from the start ptr.
				int64_t Offset;
				if (!isPointerOffset(StartPtr, MSI->getDest(), Offset, *DL))
				break;

				if (!IB.addMemSet(Offset, MSI))
				break;
				LastInst = MSI;
				}
				}
				return LastInst;
	}			}

	bool AArch64StackTagging::isInterestingAlloca(const AllocaInst &AI) {			bool AArch64StackTagging::isInterestingAlloca(const AllocaInst &AI) {
	// FIXME: support dynamic allocas			// FIXME: support dynamic allocas
	bool IsInteresting =			bool IsInteresting =
	AI.getAllocatedType()->isSized() && AI.isStaticAlloca() &&			AI.getAllocatedType()->isSized() && AI.isStaticAlloca() &&
	// alloca() may be called with 0 size, ignore it.			// alloca() may be called with 0 size, ignore it.
	AI.getAllocationSizeInBits(*DL).getValue() > 0 &&			AI.getAllocationSizeInBits(*DL).getValue() > 0 &&
	// inalloca allocas are not treated as static, and we don't want			// inalloca allocas are not treated as static, and we don't want
	// dynamic alloca instrumentation for them as well.			// dynamic alloca instrumentation for them as well.
	!AI.isUsedWithInAlloca() &&			!AI.isUsedWithInAlloca() &&
	// swifterror allocas are register promoted by ISel			// swifterror allocas are register promoted by ISel
	!AI.isSwiftError();			!AI.isSwiftError();
	return IsInteresting;			return IsInteresting;
	}			}

	void AArch64StackTagging::tagAlloca(AllocaInst AI, Instruction InsertBefore,			void AArch64StackTagging::tagAlloca(AllocaInst AI, Instruction InsertBefore,
	Value *Ptr, uint64_t Size) {			Value *Ptr, uint64_t Size) {
				auto SetTagZeroFunc =
				Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag_zero);
				auto StgpFunc =
				Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_stgp);

				InitializerBuilder IB(Size, DL, Ptr, SetTagFunc, SetTagZeroFunc, StgpFunc);
				bool LittleEndian =
				Triple(AI->getModule()->getTargetTriple()).isLittleEndian();
				// Current implementation of initializer merging assumes little endianness.
				if (MergeInit && !F->hasOptNone() && LittleEndian) {
				LLVM_DEBUG(dbgs() << "collecting initializers for " << *AI
				<< ", size = " << Size << "\n");
				InsertBefore = collectInitializers(InsertBefore, Ptr, Size, IB);
				}

	IRBuilder<> IRB(InsertBefore);			IRBuilder<> IRB(InsertBefore);
	IRB.CreateCall(SetTagFunc, {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)});			IB.generate(IRB);
	}			}

	void AArch64StackTagging::untagAlloca(AllocaInst AI, Instruction InsertBefore,			void AArch64StackTagging::untagAlloca(AllocaInst AI, Instruction InsertBefore,
	uint64_t Size) {			uint64_t Size) {
	IRBuilder<> IRB(InsertBefore);			IRBuilder<> IRB(InsertBefore);
	IRB.CreateCall(SetTagFunc, {IRB.CreatePointerCast(AI, IRB.getInt8PtrTy()),			IRB.CreateCall(SetTagFunc, {IRB.CreatePointerCast(AI, IRB.getInt8PtrTy()),
	ConstantInt::get(IRB.getInt64Ty(), Size)});			ConstantInt::get(IRB.getInt64Ty(), Size)});
	}			}
	▲ Show 20 Lines • Show All 60 Lines • ▼ Show 20 Lines

	// FIXME: check for MTE extension			// FIXME: check for MTE extension
	bool AArch64StackTagging::runOnFunction(Function &Fn) {			bool AArch64StackTagging::runOnFunction(Function &Fn) {
	if (!Fn.hasFnAttribute(Attribute::SanitizeMemTag))			if (!Fn.hasFnAttribute(Attribute::SanitizeMemTag))
	return false;			return false;

	F = &Fn;			F = &Fn;
	DL = &Fn.getParent()->getDataLayout();			DL = &Fn.getParent()->getDataLayout();
				if (MergeInit)
				AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();

	MapVector<AllocaInst *, AllocaInfo> Allocas; // need stable iteration order			MapVector<AllocaInst *, AllocaInfo> Allocas; // need stable iteration order
	SmallVector<Instruction *, 8> RetVec;			SmallVector<Instruction *, 8> RetVec;
	DenseMap<Value , AllocaInst > AllocaForValue;			DenseMap<Value , AllocaInst > AllocaForValue;
	SmallVector<Instruction *, 4> UnrecognizedLifetimes;			SmallVector<Instruction *, 4> UnrecognizedLifetimes;

	for (auto &BB : *F) {			for (auto &BB : *F) {
	for (BasicBlock::iterator IT = BB.begin(); IT != BB.end(); ++IT) {			for (BasicBlock::iterator IT = BB.begin(); IT != BB.end(); ++IT) {
	▲ Show 20 Lines • Show All 130 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/AArch64TargetMachine.cpp

Show First 20 Lines • Show All 442 Lines • ▼ Show 20 Lines	if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) {
// Call EarlyCSE pass to find and remove subexpressions in the lowered		// Call EarlyCSE pass to find and remove subexpressions in the lowered
// result.		// result.
addPass(createEarlyCSEPass());		addPass(createEarlyCSEPass());
// Do loop invariant code motion in case part of the lowered result is		// Do loop invariant code motion in case part of the lowered result is
// invariant.		// invariant.
addPass(createLICMPass());		addPass(createLICMPass());
}		}

addPass(createAArch64StackTaggingPass());		addPass(createAArch64StackTaggingPass(/* MergeInit = */ TM->getOptLevel() !=
		CodeGenOpt::None));
}		}

// Pass Pipeline Configuration		// Pass Pipeline Configuration
bool AArch64PassConfig::addPreISel() {		bool AArch64PassConfig::addPreISel() {
// Run promote constant before global merge, so that the promoted constants		// Run promote constant before global merge, so that the promoted constants
// get a chance to be merged		// get a chance to be merged
if (TM->getOptLevel() != CodeGenOpt::None && EnablePromoteConstant)		if (TM->getOptLevel() != CodeGenOpt::None && EnablePromoteConstant)
addPass(createAArch64PromoteConstantPass());		addPass(createAArch64PromoteConstantPass());
▲ Show 20 Lines • Show All 156 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/O3-pipeline.ll

	Show First 20 Lines • Show All 49 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: Expand reduction intrinsics			; CHECK-NEXT: Expand reduction intrinsics
	; CHECK-NEXT: Dominator Tree Construction			; CHECK-NEXT: Dominator Tree Construction
	; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)			; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
	; CHECK-NEXT: Function Alias Analysis Results			; CHECK-NEXT: Function Alias Analysis Results
	; CHECK-NEXT: Memory SSA			; CHECK-NEXT: Memory SSA
	; CHECK-NEXT: Interleaved Load Combine Pass			; CHECK-NEXT: Interleaved Load Combine Pass
	; CHECK-NEXT: Dominator Tree Construction			; CHECK-NEXT: Dominator Tree Construction
	; CHECK-NEXT: Interleaved Access Pass			; CHECK-NEXT: Interleaved Access Pass
				; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
				; CHECK-NEXT: Function Alias Analysis Results
	; CHECK-NEXT: AArch64 Stack Tagging			; CHECK-NEXT: AArch64 Stack Tagging
	; CHECK-NEXT: Natural Loop Information			; CHECK-NEXT: Natural Loop Information
	; CHECK-NEXT: CodeGen Prepare			; CHECK-NEXT: CodeGen Prepare
	; CHECK-NEXT: Rewrite Symbols			; CHECK-NEXT: Rewrite Symbols
	; CHECK-NEXT: FunctionPass Manager			; CHECK-NEXT: FunctionPass Manager
	; CHECK-NEXT: Dominator Tree Construction			; CHECK-NEXT: Dominator Tree Construction
	; CHECK-NEXT: Exception handling preparation			; CHECK-NEXT: Exception handling preparation
	; CHECK-NEXT: AArch64 Promote Constant			; CHECK-NEXT: AArch64 Promote Constant
	▲ Show 20 Lines • Show All 116 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/stack-tagging-initializer-merge.ll

This file was added.

				; RUN: opt < %s -stack-tagging -S -o - \| FileCheck %s

				target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
				target triple = "aarch64--linux-android"

				declare void @use(i8*)
				declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
				declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
				declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg)

				define void @OneVarNoInit() sanitize_memtag {
				entry:
				%x = alloca i32, align 4
				%0 = bitcast i32* %x to i8*
				call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0)
				call void @use(i8* nonnull %0)
				call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0)
				ret void
				}

				; CHECK-LABEL: define void @OneVarNoInit(
				; CHECK-DAG: [[X:%.*]] = alloca { i32, [12 x i8] }, align 16
				; CHECK-DAG: [[TX:%.]] = call { i32, [12 x i8] } @llvm.aarch64.tagp.{{.}}({ i32, [12 x i8] } [[X]], {{.*}}, i64 0)
				; CHECK-DAG: [[TX32:%.]] = bitcast { i32, [12 x i8] } [[TX]] to i32*
				; CHECK-DAG: [[TX8:%.]] = bitcast i32 [[TX32]] to i8*
				; CHECK-DAG: call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull [[TX8]])
				; CHECK-DAG: call void @llvm.aarch64.settag(i8* [[TX8]], i64 16)
				; CHECK-DAG: call void @use(i8* nonnull [[TX8]])
				; CHECK-DAG: call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull [[TX8]])

				define void @OneVarInitConst() sanitize_memtag {
				entry:
				%x = alloca i32, align 4
				%0 = bitcast i32* %x to i8*
				call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0)
				store i32 42, i32* %x, align 4
				call void @use(i8* nonnull %0)
				call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0)
				ret void
				}

				; CHECK-LABEL: define void @OneVarInitConst(
				; CHECK: [[TX:%.]] = call { i32, [12 x i8] } @llvm.aarch64.tagp
				; CHECK: [[TX32:%.]] = bitcast { i32, [12 x i8] } [[TX]] to i32*
				; CHECK: [[TX8:%.]] = bitcast i32 [[TX32]] to i8*
				; CHECK-NOT: aarch64.settag
				; CHECK: call void @llvm.aarch64.stgp(i8* [[TX8]], i64 42, i64 0)
				; Untagging before lifetime.end:
				; CHECK: call void @llvm.aarch64.settag(
				; CHECK-NOT: aarch64.settag
				; CHECK: ret void

				define void @ArrayInitConst() sanitize_memtag {
				entry:
				%x = alloca i32, i32 16, align 4
				%0 = bitcast i32* %x to i8*
				call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull %0)
				store i32 42, i32* %x, align 4
				call void @use(i8* nonnull %0)
				call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull %0)
				ret void
				}

				; CHECK-LABEL: define void @ArrayInitConst(
				; CHECK: [[TX:%.]] = call i32 @llvm.aarch64.tagp.
				; CHECK: [[TX8:%.]] = bitcast i32 [[TX]] to i8*
				; CHECK: call void @llvm.aarch64.stgp(i8* [[TX8]], i64 42, i64 0)
				; CHECK: [[TX8_16:%.]] = getelementptr i8, i8 [[TX8]], i32 16
				; CHECK: call void @llvm.aarch64.settag.zero(i8* [[TX8_16]], i64 48)
				; CHECK: ret void

				define void @ArrayInitConst2() sanitize_memtag {
				entry:
				%x = alloca i32, i32 16, align 4
				%0 = bitcast i32* %x to i8*
				call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull %0)
				store i32 42, i32* %x, align 4
				%1 = getelementptr i32, i32* %x, i32 1
				store i32 43, i32* %1, align 4
				%2 = getelementptr i32, i32* %x, i32 2
				%3 = bitcast i32* %2 to i64*
				store i64 -1, i64* %3, align 4
				call void @use(i8* nonnull %0)
				call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull %0)
				ret void
				}

				; CHECK-LABEL: define void @ArrayInitConst2(
				; CHECK: [[TX:%.]] = call i32 @llvm.aarch64.tagp.
				; CHECK: [[TX8:%.]] = bitcast i32 [[TX]] to i8*
				; CHECK: call void @llvm.aarch64.stgp(i8* [[TX8]], i64 184683593770, i64 -1)
				; CHECK: [[TX8_16:%.]] = getelementptr i8, i8 [[TX8]], i32 16
				; CHECK: call void @llvm.aarch64.settag.zero(i8* [[TX8_16]], i64 48)
				; CHECK: ret void

				define void @ArrayInitConstSplit() sanitize_memtag {
				entry:
				%x = alloca i32, i32 16, align 4
				%0 = bitcast i32* %x to i8*
				call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull %0)
				%1 = getelementptr i32, i32* %x, i32 1
				%2 = bitcast i32* %1 to i64*
				store i64 -1, i64* %2, align 4
				call void @use(i8* nonnull %0)
				call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull %0)
				ret void
				}

				; CHECK-LABEL: define void @ArrayInitConstSplit(
				; CHECK: [[TX:%.]] = call i32 @llvm.aarch64.tagp.
				; CHECK: [[TX8:%.]] = bitcast i32 [[TX]] to i8*
				; CHECK: call void @llvm.aarch64.stgp(i8* [[TX8]], i64 -4294967296, i64 4294967295)
				; CHECK: ret void

				define void @ArrayInitConstWithHoles() sanitize_memtag {
				entry:
				%x = alloca i32, i32 32, align 4
				%0 = bitcast i32* %x to i8*
				call void @llvm.lifetime.start.p0i8(i64 128, i8* nonnull %0)
				%1 = getelementptr i32, i32* %x, i32 5
				store i32 42, i32* %1, align 4
				%2 = getelementptr i32, i32* %x, i32 14
				store i32 43, i32* %2, align 4
				call void @use(i8* nonnull %0)
				call void @llvm.lifetime.end.p0i8(i64 128, i8* nonnull %0)
				ret void
				}

				; CHECK-LABEL: define void @ArrayInitConstWithHoles(
				; CHECK: [[TX:%.]] = call i32 @llvm.aarch64.tagp.
				; CHECK: [[TX8:%.]] = bitcast i32 [[TX]] to i8*
				; CHECK: call void @llvm.aarch64.settag.zero(i8* [[TX8]], i64 16)
				; CHECK: [[TX8_16:%.]] = getelementptr i8, i8 %0, i32 16
				; CHECK: call void @llvm.aarch64.stgp(i8* [[TX8_16]], i64 180388626432, i64 0)
				; CHECK: [[TX8_32:%.]] = getelementptr i8, i8 %0, i32 32
				; CHECK: call void @llvm.aarch64.settag.zero(i8* [[TX8_32]], i64 16)
				; CHECK: [[TX8_48:%.]] = getelementptr i8, i8 %0, i32 48
				; CHECK: call void @llvm.aarch64.stgp(i8* [[TX8_48]], i64 0, i64 43)
				; CHECK: [[TX8_64:%.]] = getelementptr i8, i8 %0, i32 64
				; CHECK: call void @llvm.aarch64.settag.zero(i8* [[TX8_64]], i64 64)
				; CHECK: ret void

				define void @InitNonConst(i32 %v) sanitize_memtag {
				entry:
				%x = alloca i32, align 4
				%0 = bitcast i32* %x to i8*
				call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0)
				store i32 %v, i32* %x, align 4
				call void @use(i8* nonnull %0)
				call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0)
				ret void
				}

				; CHECK-LABEL: define void @InitNonConst(
				; CHECK: [[TX:%.]] = call { i32, [12 x i8] } @llvm.aarch64.tagp
				; CHECK: [[TX32:%.]] = bitcast { i32, [12 x i8] } [[TX]] to i32*
				; CHECK: [[TX8:%.]] = bitcast i32 [[TX32]] to i8*
				; CHECK: [[V:%.*]] = zext i32 %v to i64
				; CHECK: call void @llvm.aarch64.stgp(i8* [[TX8]], i64 [[V]], i64 0)
				; CHECK: ret void

				define void @InitNonConst2(i32 %v, i32 %w) sanitize_memtag {
				entry:
				%x = alloca i32, i32 4, align 4
				%0 = bitcast i32* %x to i8*
				call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %0)
				store i32 %v, i32* %x, align 4
				%1 = getelementptr i32, i32* %x, i32 1
				store i32 %w, i32* %1, align 4
				call void @use(i8* nonnull %0)
				call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %0)
				ret void
				}

				; CHECK-LABEL: define void @InitNonConst2(
				; CHECK: [[TX:%.]] = call i32 @llvm.aarch64.tagp
				; CHECK: [[TX8:%.]] = bitcast i32 [[TX]] to i8*
				; CHECK: [[V:%.*]] = zext i32 %v to i64
				; CHECK: [[W:%.*]] = zext i32 %w to i64
				; CHECK: [[WS:%.*]] = shl i64 [[W]], 32
				; CHECK: [[VW:%.*]] = or i64 [[V]], [[WS]]
				; CHECK: call void @llvm.aarch64.stgp(i8* [[TX8]], i64 [[VW]], i64 0)
				; CHECK: ret void

				define void @InitVector() sanitize_memtag {
				entry:
				%x = alloca i32, i32 4, align 4
				%0 = bitcast i32* %x to i8*
				call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %0)
				%1 = bitcast i32* %x to <2 x i32>*
				store <2 x i32> <i32 1, i32 2>, <2 x i32>* %1, align 4
				call void @use(i8* nonnull %0)
				call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %0)
				ret void
				}

				; CHECK-LABEL: define void @InitVector(
				; CHECK: [[TX:%.]] = call i32 @llvm.aarch64.tagp
				; CHECK: [[TX8:%.]] = bitcast i32 [[TX]] to i8*
				; CHECK: call void @llvm.aarch64.stgp(i8* [[TX8]], i64 bitcast (<2 x i32> <i32 1, i32 2> to i64), i64 0)
				; CHECK: ret void

				define void @InitVectorPtr(i32* %p) sanitize_memtag {
				entry:
				%s = alloca <4 x i32*>, align 8
				%v0 = insertelement <4 x i32> undef, i32 %p, i32 0
				%v1 = shufflevector <4 x i32> %v0, <4 x i32> undef, <4 x i32> zeroinitializer
				store <4 x i32> %v1, <4 x i32>* %s
				%0 = bitcast <4 x i32> %s to i8*
				call void @use(i8* nonnull %0)
				ret void
				}

				; CHECK-LABEL: define void @InitVectorPtr(
				; CHECK: call <4 x i32> @llvm.aarch64.tagp
				; CHECK: [[V1:%.*]] = shufflevector
				; CHECK: [[V2:%.]] = ptrtoint <4 x i32> [[V1]] to <4 x i64>
				; CHECK: [[V3:%.*]] = bitcast <4 x i64> [[V2]] to i256
				; CHECK: [[A1:%.*]] = trunc i256 [[V3]] to i64
				; CHECK: [[A2_:%.*]] = lshr i256 [[V3]], 64
				; CHECK: [[A2:%.*]] = trunc i256 [[A2_]] to i64
				; CHECK: [[A3_:%.*]] = lshr i256 [[V3]], 128
				; CHECK: [[A3:%.*]] = trunc i256 [[A3_]] to i64
				; CHECK: [[A4_:%.*]] = lshr i256 [[V3]], 192
				; CHECK: [[A4:%.*]] = trunc i256 [[A4_]] to i64
				; CHECK: call void @llvm.aarch64.stgp({{.*}}, i64 [[A1]], i64 [[A2]])
				; CHECK: call void @llvm.aarch64.stgp({{.*}}, i64 [[A3]], i64 [[A4]])
				; CHECK: ret void

				define void @InitVectorSplit() sanitize_memtag {
				entry:
				%x = alloca i32, i32 4, align 4
				%0 = bitcast i32* %x to i8*
				call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %0)
				%1 = getelementptr i32, i32* %x, i32 1
				%2 = bitcast i32* %1 to <2 x i32>*
				store <2 x i32> <i32 1, i32 2>, <2 x i32>* %2, align 4
				call void @use(i8* nonnull %0)
				call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %0)
				ret void
				}

				; CHECK-LABEL: define void @InitVectorSplit(
				; CHECK: [[TX:%.]] = call i32 @llvm.aarch64.tagp
				; CHECK: [[TX8:%.]] = bitcast i32 [[TX]] to i8*
				; CHECK: call void @llvm.aarch64.stgp(i8* [[TX8]], i64 shl (i64 bitcast (<2 x i32> <i32 1, i32 2> to i64), i64 32), i64 lshr (i64 bitcast (<2 x i32> <i32 1, i32 2> to i64), i64 32))
				; CHECK: ret void

				define void @MemSetZero() sanitize_memtag {
				entry:
				%x = alloca i32, i32 8, align 16
				%0 = bitcast i32* %x to i8*
				call void @llvm.memset.p0i8.i64(i8* nonnull align 16 %0, i8 0, i64 32, i1 false)
				call void @use(i8* nonnull %0)
				ret void
				}

				; CHECK-LABEL: define void @MemSetZero(
				; CHECK: [[TX:%.]] = call i32 @llvm.aarch64.tagp
				; CHECK: [[TX8:%.]] = bitcast i32 [[TX]] to i8*
				; CHECK: call void @llvm.aarch64.settag.zero(i8* [[TX8]], i64 32)
				; CHECK: ret void


				define void @MemSetNonZero() sanitize_memtag {
				entry:
				%x = alloca i32, i32 8, align 16
				%0 = bitcast i32* %x to i8*
				call void @llvm.memset.p0i8.i64(i8* nonnull align 16 %0, i8 42, i64 32, i1 false)
				call void @use(i8* nonnull %0)
				ret void
				}

				; CHECK-LABEL: define void @MemSetNonZero(
				; CHECK: call void @llvm.aarch64.stgp(i8* {{.*}}, i64 3038287259199220266, i64 3038287259199220266)
				; CHECK: call void @llvm.aarch64.stgp(i8* {{.*}}, i64 3038287259199220266, i64 3038287259199220266)
				; CHECK: ret void


				define void @MemSetNonZero2() sanitize_memtag {
				entry:
				%x = alloca [32 x i8], align 16
				%0 = getelementptr inbounds [32 x i8], [32 x i8]* %x, i64 0, i64 2
				call void @llvm.memset.p0i8.i64(i8* nonnull %0, i8 42, i64 28, i1 false)
				call void @use(i8* nonnull %0)
				ret void
				}

				; CHECK-LABEL: define void @MemSetNonZero2(
				; CHECK: call void @llvm.aarch64.stgp(i8* {{.*}}, i64 3038287259199209472, i64 3038287259199220266)
				; CHECK: call void @llvm.aarch64.stgp(i8* {{.*}}, i64 3038287259199220266, i64 46360584399402)
				; CHECK: ret void

				define void @MemSetNonZero3() sanitize_memtag {
				entry:
				%x = alloca [32 x i8], align 16
				%0 = getelementptr inbounds [32 x i8], [32 x i8]* %x, i64 0, i64 2
				call void @llvm.memset.p0i8.i64(i8* nonnull %0, i8 42, i64 4, i1 false)
				%1 = getelementptr inbounds [32 x i8], [32 x i8]* %x, i64 0, i64 24
				call void @llvm.memset.p0i8.i64(i8* nonnull %1, i8 42, i64 8, i1 false)
				call void @use(i8* nonnull %0)
				ret void
				}

				; CHECK-LABEL: define void @MemSetNonZero3(
				; CHECK: call void @llvm.aarch64.stgp(i8* {{.*}}, i64 46360584388608, i64 0)
				; CHECK: call void @llvm.aarch64.stgp(i8* {{.*}}, i64 0, i64 3038287259199220266)
				; CHECK: ret void