This is an archive of the discontinued LLVM Phabricator instance.

Allow call-slop optzn for destinations with a suitable dereferenceable attribute
ClosedPublic

Authored by dotdash on Oct 16 2014, 12:22 PM.

Download Raw Diff

Details

Reviewers

Commits

rGd20816fde95c: Allow call-slop optzn for destinations with a suitable dereferenceable attribute
rL219950: Allow call-slop optzn for destinations with a suitable dereferenceable attribute

Summary

Currently, call slot optimization requires that if the destination is an
argument, the argument has the sret attribute. This is to ensure that
the memory access won't trap. In addition to sret, we can also allow the
optimization to happen for arguments that have the new dereferenceable
attribute, which gives the same guarantee.

Diff Detail

Repository: rL LLVM

Event Timeline

dotdash updated this revision to Diff 15038.Oct 16 2014, 12:22 PM

dotdash retitled this revision from to Allow call-slop optzn for destinations with a suitable dereferenceable attribute.

dotdash updated this object.

dotdash edited the test plan for this revision. (Show Details)

dotdash added a subscriber: Unknown Object (MLST).

hfinkel added a subscriber: hfinkel.Oct 16 2014, 12:36 PM

hfinkel added inline comments.

lib/Transforms/Scalar/MemCpyOptimizer.cpp
637 ↗	(On Diff #15038)	But if you do it this way you're now requiring the dereferenceable attribute, and we don't want to do that. What you want to do is keep the existing logic for sret, and just add additional support for dereferenceable.

dotdash added inline comments.Oct 16 2014, 12:43 PM

lib/Transforms/Scalar/MemCpyOptimizer.cpp
637 ↗	(On Diff #15038)	How so? The new check just skips the sret check if we already know that we have enough deferenceable bytes. If there is no dereferenceable attribute, getDereferenceableBytes() returns 0, which is < srcSize and so we do the sret check, and only if that fails, false is returned.

LGTM, thanks!

lib/Transforms/Scalar/MemCpyOptimizer.cpp
637 ↗	(On Diff #15038)	You're right; sorry about that.

This revision is now accepted and ready to land.Oct 16 2014, 12:49 PM

Closed by commit rL219950 (authored by bsteinbr).

ebotcazou mentioned this in D58431: SanitizerCommon: fixes for unwinding & backtrace on SPARC.Mar 12 2019, 3:11 AM

Revision Contents

Path

Size

llvm/

trunk/

lib/

Transforms/

Scalar/

MemCpyOptimizer.cpp

30 lines

test/

Transforms/

MemCpyOpt/

callslot_deref.ll

29 lines

Diff 15040

llvm/trunk/lib/Transforms/Scalar/MemCpyOptimizer.cpp

Show First 20 Lines • Show All 628 Lines • ▼ Show 20 Lines	if (!destArraySize)
return false;		return false;

uint64_t destSize = DL->getTypeAllocSize(A->getAllocatedType()) *		uint64_t destSize = DL->getTypeAllocSize(A->getAllocatedType()) *
destArraySize->getZExtValue();		destArraySize->getZExtValue();

if (destSize < srcSize)		if (destSize < srcSize)
return false;		return false;
} else if (Argument *A = dyn_cast<Argument>(cpyDest)) {		} else if (Argument *A = dyn_cast<Argument>(cpyDest)) {
		if (A->getDereferenceableBytes() < srcSize) {
// If the destination is an sret parameter then only accesses that are		// If the destination is an sret parameter then only accesses that are
// outside of the returned struct type can trap.		// outside of the returned struct type can trap.
if (!A->hasStructRetAttr())		if (!A->hasStructRetAttr())
return false;		return false;

Type *StructTy = cast<PointerType>(A->getType())->getElementType();		Type *StructTy = cast<PointerType>(A->getType())->getElementType();
if (!StructTy->isSized()) {		if (!StructTy->isSized()) {
// The call may never return and hence the copy-instruction may never		// The call may never return and hence the copy-instruction may never
// be executed, and therefore it's not safe to say "the destination		// be executed, and therefore it's not safe to say "the destination
// has at least <cpyLen> bytes, as implied by the copy-instruction",		// has at least <cpyLen> bytes, as implied by the copy-instruction",
return false;		return false;
}		}

uint64_t destSize = DL->getTypeAllocSize(StructTy);		uint64_t destSize = DL->getTypeAllocSize(StructTy);
if (destSize < srcSize)		if (destSize < srcSize)
return false;		return false;
		}
} else {		} else {
return false;		return false;
}		}

// Check that dest points to memory that is at least as aligned as src.		// Check that dest points to memory that is at least as aligned as src.
unsigned srcAlign = srcAlloca->getAlignment();		unsigned srcAlign = srcAlloca->getAlignment();
if (!srcAlign)		if (!srcAlign)
srcAlign = DL->getABITypeAlignment(srcAlloca->getAllocatedType());		srcAlign = DL->getABITypeAlignment(srcAlloca->getAllocatedType());
▲ Show 20 Lines • Show All 424 Lines • Show Last 20 Lines

llvm/trunk/test/Transforms/MemCpyOpt/callslot_deref.ll

				; RUN: opt < %s -S -basicaa -memcpyopt \| FileCheck %s
				target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"

				declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) unnamed_addr nounwind
				declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind

				; all bytes of %dst that are touch by the memset are dereferenceable
				define void @must_remove_memcpy(i8* noalias nocapture dereferenceable(4096) %dst) {
				; CHECK-LABEL: @must_remove_memcpy(
				; CHECK: call void @llvm.memset.p0i8.i64
				; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64
				%src = alloca [4096 x i8], align 1
				%p = getelementptr inbounds [4096 x i8]* %src, i64 0, i64 0
				call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i32 1, i1 false)
				call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %p, i64 4096, i32 1, i1 false) #2
				ret void
				}

				; memset touch more bytes than those guaranteed to be dereferenceable
				define void @must_not_remove_memcpy(i8* noalias nocapture dereferenceable(1024) %dst) {
				; CHECK-LABEL: @must_not_remove_memcpy(
				; CHECK: call void @llvm.memset.p0i8.i64
				; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
				%src = alloca [4096 x i8], align 1
				%p = getelementptr inbounds [4096 x i8]* %src, i64 0, i64 0
				call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i32 1, i1 false)
				call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %p, i64 4096, i32 1, i1 false) #2
				ret void
				}