diff --git a/clang/test/CodeGen/msp430-struct-or-union-args.c b/clang/test/CodeGen/msp430-struct-or-union-args.c --- a/clang/test/CodeGen/msp430-struct-or-union-args.c +++ b/clang/test/CodeGen/msp430-struct-or-union-args.c @@ -1,5 +1,7 @@ // REQUIRES: msp430-registered-target -// RUN: %clang -target msp430 -fno-inline-functions -S -o- %s | FileCheck --check-prefixes=ASM %s +// Optimized to check that some of memcpy intrinsic invocations are optimized out. +// RUN: %clang -target msp430 -fno-inline-functions -S -Os -o- %s | FileCheck --check-prefixes=ASM %s +// Do not use any optimization to not clutter the output with deduced LLVM IR attributes. // RUN: %clang -target msp430 -fno-inline-functions -S -emit-llvm -o- %s | FileCheck --check-prefixes=IR %s #include @@ -89,3 +91,33 @@ // ASM-NEXT: ret middle_u(u); } + +// No need to create a temporary copy of the struct/union-typed argument +// if it is just passed to other function as-is. +// TODO For now, it works when size of structure is more than 8 bytes, +// otherwise the memcpy intrinsic will be replaced by InstCombiner. + +struct LL { + long long a[2]; +}; + +extern struct LL ll; + +extern void leaf(struct LL x); + +void middle(struct LL x) { +// ASM: middle: +// No stack-allocated objects: +// ASM-NOT: r1 +// ASM: call #leaf +// ASM-NEXT: ret + leaf(x); +} + +void caller(void) { +// ASM: caller: +// ASM: mov #ll, r12 +// ASM-NEXT: call #middle +// ASM-NEXT: ret + middle(ll); +} diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h --- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -65,7 +65,7 @@ bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep); bool processMemSetMemCpyDependence(MemCpyInst *MemCpy, MemSetInst *MemSet); bool performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, MemSetInst *MemSet); - bool processByValArgument(CallBase &CB, unsigned ArgNo); + bool processByValOrByRefArgument(CallBase &CB, unsigned ArgNo); Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr, Value *ByteVal); diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -1217,10 +1217,10 @@ return true; } -/// This is called on every byval argument in call sites. -bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) { +/// This is called on every byval/byref argument in call sites. +bool MemCpyOptPass::processByValOrByRefArgument(CallBase &CB, unsigned ArgNo) { const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout(); - // Find out what feeds this byval argument. + // Find out what feeds this byval/byref argument. Value *ByValArg = CB.getArgOperand(ArgNo); Type *ByValTy = cast(ByValArg->getType())->getElementType(); uint64_t ByValSize = DL.getTypeAllocSize(ByValTy); @@ -1287,7 +1287,7 @@ TmpCast = TmpBitCast; } - LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n" + LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval or byref:\n" << " " << *MDep << "\n" << " " << CB << "\n"); @@ -1328,8 +1328,8 @@ RepeatInstruction = processMemMove(M); else if (auto *CB = dyn_cast(I)) { for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) - if (CB->isByValArgument(i)) - MadeChange |= processByValArgument(*CB, i); + if (CB->isByValArgument(i) || CB->paramHasAttr(i, Attribute::ByRef)) + MadeChange |= processByValOrByRefArgument(*CB, i); } // Reprocess the instruction if desired.