This is an archive of the discontinued LLVM Phabricator instance.

[ASAN] Support memory checks on scalable vector typed masked load and store
ClosedPublic

Authored by reames on Mar 2 2023, 3:05 PM.

Download Raw Diff

Details

Reviewers

craig.topper
asb
kito-cheng
usama54321
MaskRay

Commits

rG368cb421c396: [ASAN] Support memory checks on scalable vector typed masked load and store

Summary

This takes the approach of using the loop based formation for scalable vectors only. We could potentially use the loop form for fixed vectors only, but we'd loose the unroll and specialize on constant vector logic which is already present. I don't have a strong opinion on whether the existing logic is worthwhile, I kept it mostly to minimize test churn.

Worth noting is that there is a better lowering available. The plain vector lowering appears to check only the first and last byte. By analogy, we should be able to check only the first active and last active byte in the masked op. This is a more invasive change to asan, and I decided simply supporting scalable vectors at all was a better starting place.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

reames created this revision.Mar 2 2023, 3:05 PM

Herald added a project: Restricted Project. · View Herald TranscriptMar 2 2023, 3:05 PM

Herald added subscribers: Enna1, bollu, hiraditya, mcrosier. · View Herald Transcript

reames requested review of this revision.Mar 2 2023, 3:05 PM

Herald added a project: Restricted Project. · View Herald TranscriptMar 2 2023, 3:05 PM

Herald added a subscriber: alextsao1999. · View Herald Transcript

reames added a parent revision: D145175: [ASAN] Support memory checks on scalable vector typed loads and stores.Mar 2 2023, 3:05 PM

Harbormaster completed remote builds in B217073: Diff 502004.Mar 2 2023, 4:27 PM

Changes are look reasonable and good to me, but I am also new for ASan too, I guess we need few more eye to review.

In D145198#4173828, @kito-cheng wrote:

Changes are look reasonable and good to me, but I am also new for ASan too, I guess we need few more eye to review.

Given lack of other activity on the review, I'm going to wait another day or so, and then land.

craig.topper added inline comments.Mar 10 2023, 8:08 AM

llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
1524	Can it be a FixedVectorType here?

This revision was not accepted when it landed; it landed in state Needs Review.Mar 10 2023, 4:20 PM

This revision was landed with ongoing or failed builds.

Closed by commit rG368cb421c396: [ASAN] Support memory checks on scalable vector typed masked load and store (authored by reames). · Explain Why

This revision was automatically updated to reflect the committed changes.

reames added a commit: rG368cb421c396: [ASAN] Support memory checks on scalable vector typed masked load and store.

kito-cheng mentioned this in D146208: [ASAN] Support memory checks on vp.load/store..Apr 28 2023, 1:30 AM

Revision Contents

Path

Size

llvm/

lib/

Transforms/

Instrumentation/

AddressSanitizer.cpp

110 lines

test/

Instrumentation/

AddressSanitizer/

asan-masked-load-store.ll

65 lines

Diff 504301

llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp

Show First 20 Lines • Show All 1,433 Lines • ▼ Show 20 Lines	case 128:
return Pass->instrumentAddress(I, InsertBefore, Addr, FixedSize,		return Pass->instrumentAddress(I, InsertBefore, Addr, FixedSize,
IsWrite, nullptr, UseCalls, Exp);		IsWrite, nullptr, UseCalls, Exp);
}		}
}		}
Pass->instrumentUnusualSizeOrAlignment(I, InsertBefore, Addr, TypeStoreSize,		Pass->instrumentUnusualSizeOrAlignment(I, InsertBefore, Addr, TypeStoreSize,
IsWrite, nullptr, UseCalls, Exp);		IsWrite, nullptr, UseCalls, Exp);
}		}

		static void SplitBlockAndInsertSimpleForLoop(Value *End,
		Instruction *SplitBefore,
		Instruction *&BodyIP,
		Value *&Index) {
		BasicBlock *LoopPred = SplitBefore->getParent();
		BasicBlock *LoopBody = SplitBlock(SplitBefore->getParent(), SplitBefore);
		BasicBlock *LoopExit = SplitBlock(SplitBefore->getParent(), SplitBefore);

		auto *Ty = End->getType();
		auto &DL = SplitBefore->getModule()->getDataLayout();
		const unsigned Bitwidth = DL.getTypeSizeInBits(Ty);

		IRBuilder<> Builder(LoopBody->getTerminator());
		auto *IV = Builder.CreatePHI(Ty, 2, "iv");
		auto *IVNext =
		Builder.CreateAdd(IV, ConstantInt::get(Ty, 1), IV->getName() + ".next",
		/HasNUW=/true, /HasNSW=/Bitwidth != 2);
		auto *IVCheck = Builder.CreateICmpEQ(IVNext, End,
		IV->getName() + ".check");
		Builder.CreateCondBr(IVCheck, LoopExit, LoopBody);
		LoopBody->getTerminator()->eraseFromParent();

		// Populate the IV PHI.
		IV->addIncoming(ConstantInt::get(Ty, 0), LoopPred);
		IV->addIncoming(IVNext, LoopBody);

		BodyIP = LoopBody->getFirstNonPHI();
		Index = IV;
		}


static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass,		static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass,
const DataLayout &DL, Type *IntptrTy,		const DataLayout &DL, Type *IntptrTy,
Value Mask, Instruction I,		Value Mask, Instruction I,
Value *Addr, MaybeAlign Alignment,		Value *Addr, MaybeAlign Alignment,
unsigned Granularity, Type *OpType,		unsigned Granularity, Type *OpType,
bool IsWrite, Value *SizeArgument,		bool IsWrite, Value *SizeArgument,
bool UseCalls, uint32_t Exp) {		bool UseCalls, uint32_t Exp) {
auto *VTy = cast<FixedVectorType>(OpType);		auto *VTy = cast<VectorType>(OpType);
uint64_t ElemTypeSize = DL.getTypeStoreSizeInBits(VTy->getScalarType());
unsigned Num = VTy->getNumElements();		TypeSize ElemTypeSize = DL.getTypeStoreSizeInBits(VTy->getScalarType());
auto Zero = ConstantInt::get(IntptrTy, 0);		auto Zero = ConstantInt::get(IntptrTy, 0);

		// For fixed length vectors, it's legal to fallthrough into the generic loop
		// lowering below, but we chose to unroll and specialize instead. We might want
		// to revisit this heuristic decision.
		if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
		unsigned Num = FVTy->getNumElements();
for (unsigned Idx = 0; Idx < Num; ++Idx) {		for (unsigned Idx = 0; Idx < Num; ++Idx) {
Value *InstrumentedAddress = nullptr;		Value *InstrumentedAddress = nullptr;
Instruction *InsertBefore = I;		Instruction *InsertBefore = I;
if (auto *Vector = dyn_cast<ConstantVector>(Mask)) {		if (auto *Vector = dyn_cast<ConstantVector>(Mask)) {
// dyn_cast as we might get UndefValue		// dyn_cast as we might get UndefValue
if (auto *Masked = dyn_cast<ConstantInt>(Vector->getOperand(Idx))) {		if (auto *Masked = dyn_cast<ConstantInt>(Vector->getOperand(Idx))) {
if (Masked->isZero())		if (Masked->isZero())
// Mask is constant false, so no instrumentation needed.		// Mask is constant false, so no instrumentation needed.
continue;		continue;
// If we have a true or undef value, fall through to doInstrumentAddress		// If we have a true or undef value, fall through to doInstrumentAddress
// with InsertBefore == I		// with InsertBefore == I
}		}
} else {		} else {
IRBuilder<> IRB(I);		IRBuilder<> IRB(I);
Value *MaskElem = IRB.CreateExtractElement(Mask, Idx);		Value *MaskElem = IRB.CreateExtractElement(Mask, Idx);
Instruction *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, I, false);		Instruction *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, I, false);
InsertBefore = ThenTerm;		InsertBefore = ThenTerm;
}		}

IRBuilder<> IRB(InsertBefore);		IRBuilder<> IRB(InsertBefore);
InstrumentedAddress =		InstrumentedAddress =
IRB.CreateGEP(VTy, Addr, {Zero, ConstantInt::get(IntptrTy, Idx)});		IRB.CreateGEP(VTy, Addr, {Zero, ConstantInt::get(IntptrTy, Idx)});
doInstrumentAddress(Pass, I, InsertBefore, InstrumentedAddress, Alignment,		doInstrumentAddress(Pass, I, InsertBefore, InstrumentedAddress, Alignment,
Granularity, TypeSize::Fixed(ElemTypeSize), IsWrite,		Granularity, ElemTypeSize, IsWrite,
SizeArgument, UseCalls, Exp);		SizeArgument, UseCalls, Exp);
}		}
		return;
		}


		IRBuilder<> IRB(I);
		Constant *MinNumElem =
		ConstantInt::get(IntptrTy, VTy->getElementCount().getKnownMinValue());
		assert(isa<ScalableVectorType>(VTy) && "generalize if reused for fixed length");
		Value *NumElements = IRB.CreateVScale(MinNumElem);
		craig.topperUnsubmitted Not Done Reply Inline Actions Can it be a FixedVectorType here? craig.topper: Can it be a FixedVectorType here?

		Instruction *BodyIP;
		Value *Index;
		SplitBlockAndInsertSimpleForLoop(NumElements, I, BodyIP, Index);

		IRB.SetInsertPoint(BodyIP);
		Value *MaskElem = IRB.CreateExtractElement(Mask, Index);
		Instruction *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, BodyIP, false);
		IRB.SetInsertPoint(ThenTerm);

		Value *InstrumentedAddress = IRB.CreateGEP(VTy, Addr, {Zero, Index});
		doInstrumentAddress(Pass, I, &*IRB.GetInsertPoint(), InstrumentedAddress, Alignment,
		Granularity, ElemTypeSize, IsWrite, SizeArgument,
		UseCalls, Exp);
}		}

void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis,		void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis,
InterestingMemoryOperand &O, bool UseCalls,		InterestingMemoryOperand &O, bool UseCalls,
const DataLayout &DL) {		const DataLayout &DL) {
Value *Addr = O.getPtr();		Value *Addr = O.getPtr();

// Optimization experiments.		// Optimization experiments.
▲ Show 20 Lines • Show All 2,028 Lines • Show Last 20 Lines

llvm/test/Instrumentation/AddressSanitizer/asan-masked-load-store.ll

	Show First 20 Lines • Show All 302 Lines • ▼ Show 20 Lines
	; DISABLED-NEXT: [[RES:%.]] = load <4 x float>, ptr [[P:%.]], align 16			; DISABLED-NEXT: [[RES:%.]] = load <4 x float>, ptr [[P:%.]], align 16
	; DISABLED-NEXT: [[RES2:%.]] = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[P]], i32 4, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> [[ARG:%.]])			; DISABLED-NEXT: [[RES2:%.]] = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[P]], i32 4, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> [[ARG:%.]])
	; DISABLED-NEXT: ret <4 x float> [[RES2]]			; DISABLED-NEXT: ret <4 x float> [[RES2]]
	;			;
	%res = load <4 x float>, ptr %p			%res = load <4 x float>, ptr %p
	%res2 = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr %p, i32 4, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> %arg)			%res2 = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr %p, i32 4, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> %arg)
	ret <4 x float> %res2			ret <4 x float> %res2
	}			}

				;; Scalable vector tests
				;; ---------------------------
				declare <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr, i32, <vscale x 4 x i1>, <vscale x 4 x float>)
				declare void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float>, ptr, i32, <vscale x 4 x i1>)

				define <vscale x 4 x float> @scalable.load.nxv4f32(ptr %p, <vscale x 4 x i1> %mask) sanitize_address {
				; CHECK-LABEL: @scalable.load.nxv4f32(
				; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
				; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
				; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
				; CHECK: .split:
				; CHECK-NEXT: [[IV:%.]] = phi i64 [ 0, [[TMP0:%.]] ], [ [[IV_NEXT:%.]], [[TMP7:%.]] ]
				; CHECK-NEXT: [[TMP3:%.]] = extractelement <vscale x 4 x i1> [[MASK:%.]], i64 [[IV]]
				; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP7]]
				; CHECK: 4:
				; CHECK-NEXT: [[TMP5:%.]] = getelementptr <vscale x 4 x float>, ptr [[P:%.]], i64 0, i64 [[IV]]
				; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64
				; CHECK-NEXT: call void @__asan_load4(i64 [[TMP6]])
				; CHECK-NEXT: br label [[TMP7]]
				; CHECK: 7:
				; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
				; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP2]]
				; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
				; CHECK: .split.split:
				; CHECK-NEXT: [[RES:%.*]] = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[P]], i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> undef)
				; CHECK-NEXT: ret <vscale x 4 x float> [[RES]]
				;
				; DISABLED-LABEL: @scalable.load.nxv4f32(
				; DISABLED-NEXT: [[RES:%.]] = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[P:%.]], i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x float> undef)
				; DISABLED-NEXT: ret <vscale x 4 x float> [[RES]]
				;
				%res = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr %p, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> undef)
				ret <vscale x 4 x float> %res
				}

				define void @scalable.store.nxv4f32(ptr %p, <vscale x 4 x float> %arg, <vscale x 4 x i1> %mask) sanitize_address {
				; CHECK-LABEL: @scalable.store.nxv4f32(
				; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
				; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
				; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
				; CHECK: .split:
				; CHECK-NEXT: [[IV:%.]] = phi i64 [ 0, [[TMP0:%.]] ], [ [[IV_NEXT:%.]], [[TMP7:%.]] ]
				; CHECK-NEXT: [[TMP3:%.]] = extractelement <vscale x 4 x i1> [[MASK:%.]], i64 [[IV]]
				; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP7]]
				; CHECK: 4:
				; CHECK-NEXT: [[TMP5:%.]] = getelementptr <vscale x 4 x float>, ptr [[P:%.]], i64 0, i64 [[IV]]
				; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64
				; CHECK-NEXT: call void @__asan_store4(i64 [[TMP6]])
				; CHECK-NEXT: br label [[TMP7]]
				; CHECK: 7:
				; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
				; CHECK-NEXT: [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP2]]
				; CHECK-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
				; CHECK: .split.split:
				; CHECK-NEXT: tail call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[ARG:%.*]], ptr [[P]], i32 4, <vscale x 4 x i1> [[MASK]])
				; CHECK-NEXT: ret void
				;
				; DISABLED-LABEL: @scalable.store.nxv4f32(
				; DISABLED-NEXT: tail call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[ARG:%.]], ptr [[P:%.]], i32 4, <vscale x 4 x i1> [[MASK:%.*]])
				; DISABLED-NEXT: ret void
				;
				tail call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %arg, ptr %p, i32 4, <vscale x 4 x i1> %mask)
				ret void
				}