diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -1563,6 +1563,12 @@ return paramHasAttr(ArgNo, Attribute::ByVal); } + /// Determine whether this argument is passed via pointer annotated with + /// `byref` attribute. + bool isByRefArgument(unsigned ArgNo) const { + return paramHasAttr(ArgNo, Attribute::ByRef); + } + /// Determine whether this argument is passed in an alloca. bool isInAllocaArgument(unsigned ArgNo) const { return paramHasAttr(ArgNo, Attribute::InAlloca); diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h --- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -65,7 +65,7 @@ bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep); bool processMemSetMemCpyDependence(MemCpyInst *MemCpy, MemSetInst *MemSet); bool performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, MemSetInst *MemSet); - bool processByValArgument(CallBase &CB, unsigned ArgNo); + bool processByValOrByRefArgument(CallBase &CB, unsigned ArgNo); Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr, Value *ByteVal); diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -1217,10 +1217,10 @@ return true; } -/// This is called on every byval argument in call sites. -bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) { +/// This is called on every byval/byref argument in call sites. +bool MemCpyOptPass::processByValOrByRefArgument(CallBase &CB, unsigned ArgNo) { const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout(); - // Find out what feeds this byval argument. + // Find out what feeds this byval/byref argument. Value *ByValArg = CB.getArgOperand(ArgNo); Type *ByValTy = cast(ByValArg->getType())->getElementType(); uint64_t ByValSize = DL.getTypeAllocSize(ByValTy); @@ -1238,6 +1238,23 @@ ByValArg->stripPointerCasts() != MDep->getDest()) return false; + // ByRef arguments should be handled with care: they are not implicitly copied + // to the calling function's stack frame, so they may be accessible from other + // threads, coroutines, etc. On the other hand, there is a special case: just + // passing through an argument down the stack like this: + // void leaf(struct X /* byref(%struct.X) */); + // void intermediate(struct X a /* byref(%struct.X) */) { + // /* prologue: `a` is implicitly copied by clang in the callee frame */ + // leaf(a); + // } + // So, checking whether the memcpy() source is byref argument on itself. + if (CB.isByRefArgument(ArgNo)) { + Argument *SourceAsArgument = + dyn_cast(MDep->getSource()->stripPointerCasts()); + if (!SourceAsArgument || !SourceAsArgument->hasByRefAttr()) + return false; + } + // The length of the memcpy must be larger or equal to the size of the byval. ConstantInt *C1 = dyn_cast(MDep->getLength()); if (!C1 || C1->getValue().getZExtValue() < ByValSize) @@ -1287,7 +1304,7 @@ TmpCast = TmpBitCast; } - LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n" + LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval or byref:\n" << " " << *MDep << "\n" << " " << CB << "\n"); @@ -1327,9 +1344,10 @@ else if (MemMoveInst *M = dyn_cast(I)) RepeatInstruction = processMemMove(M); else if (auto *CB = dyn_cast(I)) { - for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) - if (CB->isByValArgument(i)) - MadeChange |= processByValArgument(*CB, i); + for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) { + if (CB->isByValArgument(i) || CB->isByRefArgument(i)) + MadeChange |= processByValOrByRefArgument(*CB, i); + } } // Reprocess the instruction if desired. diff --git a/llvm/test/Transforms/MemCpyOpt/byref-memcpy.ll b/llvm/test/Transforms/MemCpyOpt/byref-memcpy.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/MemCpyOpt/byref-memcpy.ll @@ -0,0 +1,45 @@ +; RUN: opt -memcpyopt -dse -S < %s | FileCheck %s + +%struct.S = type { i32 } + +declare void @llvm.memcpy.p0i8.p0i8.i16(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i16, i1 immarg) + +; A caller of this function has to arrange everything for the pointer it passes +; to be usable with byref attrbiute anyway, so just pass the pointer through. +define void @function_with_byref_arg(%struct.S* byref(%struct.S) align 4 %0) { +; CHECK: define void @function_with_byref_arg{{.*}}byref(%struct.S){{.*}} { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @leaf +; CHECK-NEXT: ret void +; CHECK-NEXT: } +entry: + %x = alloca %struct.S, align 4 + %1 = bitcast %struct.S* %x to i8* + %2 = bitcast %struct.S* %0 to i8* + call void @llvm.memcpy.p0i8.p0i8.i16(i8* align 4 %1, i8* align 4 %2, i16 16, i1 false) + call void @leaf(%struct.S* byref(%struct.S) align 4 %x) + ret void +} + +; A generic pointer may point to memory being modified by other threads, +; coroutines, etc. during execution of @leaf(). +define void @function_with_pointer_arg(%struct.S* align 4 %0) { +; CHECK: define void @function_with_pointer_arg(%struct.S* align 4 [[V0:%.+]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA:%.+]] = alloca %struct.S, align 4 +; CHECK-NEXT: [[V1:%.+]] = bitcast %struct.S* [[ALLOCA]] to i8* +; CHECK-NEXT: [[V2:%.+]] = bitcast %struct.S* [[V0]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i16(i8* align 4 %1, i8* align 4 %2, i16 16, i1 false) +; CHECK-NEXT: call void @leaf(%struct.S* byref(%struct.S) align 4 [[ALLOCA]]) +; CHECK-NEXT: ret void +; CHECK-NEXT: } +entry: + %x = alloca %struct.S, align 4 + %1 = bitcast %struct.S* %x to i8* + %2 = bitcast %struct.S* %0 to i8* + call void @llvm.memcpy.p0i8.p0i8.i16(i8* align 4 %1, i8* align 4 %2, i16 16, i1 false) + call void @leaf(%struct.S* byref(%struct.S) align 4 %x) + ret void +} + +declare void @leaf(%struct.S* byref(%struct.S) align 4)