Index: lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- lib/Transforms/Scalar/DeadStoreElimination.cpp +++ lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -768,6 +768,38 @@ if (DeadStackObjects.empty()) break; + // Check a corner case: when a tail call has a byval parameter, + // getModRefInfo report no mod ref for the call site. The reason + // is that a tail call cannot reference callers local memory nor + // can the caller own the byval of a tail call (which the caller + // by definiton of byval is supposed to do. But since the tail does not + // return getModRefInfo does not check that byval condition for tail + // calls). + // But DSE cannot remove a store to local memory when a byval argument of + // a tail call needs it. + if (const CallInst *CI = dyn_cast(CS.getInstruction())) + if (CI->isTailCall()) { + bool PassedAsArg = false; + unsigned OperandNo = 0; + for (auto Arg = CS.data_operands_begin(), CE = CS.data_operands_end(); + Arg != CE; ++Arg, ++OperandNo) { + if ((*Arg)->getType()->isPointerTy() && + CS.isByValArgument(OperandNo)) + if (std::find_if(DeadStackObjects.begin(), DeadStackObjects.end(), + [&](Value *I) { + // Does the call site touch the value? + AliasResult AR = AA->alias( + MemoryLocation(*Arg), + MemoryLocation(I, getPointerSize(I, DL, *TLI))); + return AR != NoAlias; + ; + })) + PassedAsArg = true; + } + if (PassedAsArg) + break; + } + continue; } Index: test/Transforms/DeadStoreElimination/tail-call-with-byval-param.ll =================================================================== --- /dev/null +++ test/Transforms/DeadStoreElimination/tail-call-with-byval-param.ll @@ -0,0 +1,35 @@ +; RUN: opt < %s -basicaa -dse -S | FileCheck %s +target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128" +target triple = "i386-apple-macosx10.11.0" + +%class.IReproHeapDumb = type { i8 } +%class.CReproHeapDumbObject = type { i32 (...)**, %class.IReproHeapDumb* } +%class.IStreamDumb = type { i32 (...)** } +%class.CByteStreamWrapperBase = type { %class.CReproHeapDumbObject, i64, i32, [4 x i8] } +%class.CByteStreamToIStream = type { %class.CByteStreamWrapperBase.base, %class.IStreamDumb } +%class.CByteStreamWrapperBase.base = type { %class.CReproHeapDumbObject, i64, i32 } +%union._LARGE_INTEGER2 = type { %struct.anon } +%struct.anon = type { i32, i32 } + +declare void @llvm.dbg.declare(metadata, metadata, metadata) +declare void @llvm.dbg.value(metadata, i64) +declare i32 @_ZN20CByteStreamToIStream4SeekE15_LARGE_INTEGER2jPS0_(%class.CByteStreamToIStream* %this, %union._LARGE_INTEGER2* align 4, i32 %dwOrigin, %union._LARGE_INTEGER2* %plibNewPosition) + +define i32 +@_ZThn20_N20CByteStreamToIStream4SeekE15_LARGE_INTEGER2jPS0_(%class.CByteStreamToIStream* %this, %union._LARGE_INTEGER2* byval align 4, i32 %dwOrigin, %union._LARGE_INTEGER2* %plibNewPosition) { +entry: + %dlibMove = alloca %union._LARGE_INTEGER2, align 8 + %v1 = bitcast %union._LARGE_INTEGER2* %0 to i64* + %v2 = bitcast %union._LARGE_INTEGER2* %dlibMove to i64* + %v3 = load i64, i64* %v1, align 4 +; CHECK: store i64 %v3, i64* %v2 + store i64 %v3, i64* %v2, align 8 + call void @llvm.dbg.value(metadata %class.CByteStreamToIStream* %this, i64 0) + call void @llvm.dbg.value(metadata i32 %dwOrigin, i64 0) + call void @llvm.dbg.value(metadata %union._LARGE_INTEGER2* %plibNewPosition, i64 0) + %v4 = getelementptr inbounds %class.CByteStreamToIStream, %class.CByteStreamToIStream* %this, i32 -1, i32 0, i32 0, i32 1 + %v5 = bitcast %class.IReproHeapDumb** %v4 to %class.CByteStreamToIStream* + call void @llvm.dbg.value(metadata %union._LARGE_INTEGER2* %dlibMove, i64 0) + %call = tail call i32 @_ZN20CByteStreamToIStream4SeekE15_LARGE_INTEGER2jPS0_(%class.CByteStreamToIStream* %v5, %union._LARGE_INTEGER2* nonnull byval align 4 %dlibMove, i32 %dwOrigin, %union._LARGE_INTEGER2* %plibNewPosition) + ret i32 %call +}