diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h --- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -73,6 +73,7 @@ bool performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, MemSetInst *MemSet, BatchAAResults &BAA); bool processByValArgument(CallBase &CB, unsigned ArgNo); + bool processImmutArgument(CallBase &CB, unsigned ArgNo); Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr, Value *ByteVal); bool moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI); diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -1604,6 +1604,101 @@ return true; } +/// This is called on memcpy dest pointer arguments attributed as immutable +/// during call. Try to use memcpy source directly if all of the following +/// conditions are satisfied. +/// 1. The memcpy dst is neither modified during the call nor captured by the +/// call. (if readonly, noalias, nocapture attributes on call-site.) +/// 2. The memcpy dst is an alloca with known alignment & size. +/// 2-1. The memcpy length == the alloca size which ensures that the new +/// pointer is dereferenceable for the required range +/// 2-2. The src pointer has alignment >= the alloca alignment or can be +/// enforced so. +/// 3. The memcpy dst and src is not modified between the memcpy and the call. +/// (if MSSA clobber check is safe.) +/// 4. The memcpy src is not modified during the call. (ModRef check shows no +/// Mod.) +bool MemCpyOptPass::processImmutArgument(CallBase &CB, unsigned ArgNo) { + // 1. Ensure passed argument is immutable during call. + if (!(CB.paramHasAttr(ArgNo, Attribute::NoAlias) && + CB.paramHasAttr(ArgNo, Attribute::NoCapture))) + return false; + const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout(); + Value *ImmutArg = CB.getArgOperand(ArgNo); + + // 2. Check that arg is alloca + // TODO: Even if the arg gets back to branches, we can remove memcpy if all + // the alloca alignments can be enforced to source alignment. + auto *AI = dyn_cast(ImmutArg->stripPointerCasts()); + if (!AI) + return false; + + std::optional AllocaSize = AI->getAllocationSize(DL); + // Can't handle unknown size alloca. + // (e.g. Variable Length Array, Scalable Vector) + if (!AllocaSize || AllocaSize->isScalable()) + return false; + MemoryLocation Loc(ImmutArg, LocationSize::precise(*AllocaSize)); + MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB); + if (!CallAccess) + return false; + + MemCpyInst *MDep = nullptr; + BatchAAResults BAA(*AA); + MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( + CallAccess->getDefiningAccess(), Loc, BAA); + if (auto *MD = dyn_cast(Clobber)) + MDep = dyn_cast_or_null(MD->getMemoryInst()); + + // If the immut argument isn't fed by a memcpy, ignore it. If it is fed by + // a memcpy, check that the arg equals the memcpy dest. + if (!MDep || MDep->isVolatile() || AI != MDep->getDest()) + return false; + + // The address space of the memcpy source must match the immut argument + if (MDep->getSource()->getType()->getPointerAddressSpace() != + ImmutArg->getType()->getPointerAddressSpace()) + return false; + + // 2-1. The length of the memcpy must be equal to the size of the alloca. + auto *MDepLen = dyn_cast(MDep->getLength()); + if (!MDepLen || AllocaSize != MDepLen->getValue()) + return false; + + // 2-2. the memcpy source align must be larger than or equal the alloca's + // align. If not so, we check to see if we can force the source of the memcpy + // to the alignment we need. If we fail, we bail out. + Align MemDepAlign = MDep->getSourceAlign().valueOrOne(); + Align AllocaAlign = AI->getAlign(); + if (MemDepAlign < AllocaAlign && + getOrEnforceKnownAlignment(MDep->getSource(), AllocaAlign, DL, &CB, AC, + DT) < AllocaAlign) + return false; + + // 3. Verify that the source doesn't change in between the memcpy and + // the call. + // memcpy(a <- b) + // *b = 42; + // foo(*a) + // It would be invalid to transform the second memcpy into foo(*b). + if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep), + MSSA->getMemoryAccess(MDep), CallAccess)) + return false; + + // 4. The memcpy src must not be modified during the call. + if (isModSet(AA->getModRefInfo(&CB, MemoryLocation::getForSource(MDep)))) + return false; + + LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to Immut src:\n" + << " " << *MDep << "\n" + << " " << CB << "\n"); + + // Otherwise we're good! Update the immut argument. + CB.setArgOperand(ArgNo, MDep->getSource()); + ++NumMemCpyInstr; + return true; +} + /// Executes one iteration of MemCpyOptPass. bool MemCpyOptPass::iterateOnFunction(Function &F) { bool MadeChange = false; @@ -1632,9 +1727,12 @@ else if (auto *M = dyn_cast(I)) RepeatInstruction = processMemMove(M); else if (auto *CB = dyn_cast(I)) { - for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) + for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) { if (CB->isByValArgument(i)) MadeChange |= processByValArgument(*CB, i); + else if (CB->onlyReadsMemory(i)) + MadeChange |= processImmutArgument(*CB, i); + } } // Reprocess the instruction if desired. diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy.ll b/llvm/test/Transforms/MemCpyOpt/memcpy.ll --- a/llvm/test/Transforms/MemCpyOpt/memcpy.ll +++ b/llvm/test/Transforms/MemCpyOpt/memcpy.ll @@ -395,13 +395,9 @@ declare void @f(ptr) declare void @f_full_readonly(ptr nocapture noalias readonly) -; TODO: Remove memcpy, which is guaranteed to be invariant -; before and after the call because of its attributes. define void @immut_param(ptr align 4 noalias %val) { ; CHECK-LABEL: @immut_param( -; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAL1]], ptr align 4 [[VAL:%.*]], i64 1, i1 false) -; CHECK-NEXT: call void @f(ptr noalias nocapture readonly align 4 [[VAL1]]) +; CHECK-NEXT: call void @f(ptr noalias nocapture readonly align 4 [[VAL:%.*]]) ; CHECK-NEXT: ret void ; %val1 = alloca i8, align 4 @@ -452,12 +448,9 @@ ret void } -; TODO: Remove memcpy define void @immut_param_readonly(ptr align 4 noalias %val) { ; CHECK-LABEL: @immut_param_readonly( -; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAL1]], ptr align 4 [[VAL:%.*]], i64 1, i1 false) -; CHECK-NEXT: call void @f_full_readonly(ptr align 4 [[VAL1]]) +; CHECK-NEXT: call void @f_full_readonly(ptr align 4 [[VAL:%.*]]) ; CHECK-NEXT: ret void ; %val1 = alloca i8, align 4 @@ -466,12 +459,9 @@ ret void } -; TODO: Remove memcpy define void @immut_param_no_align(ptr align 4 noalias %val) { ; CHECK-LABEL: @immut_param_no_align( -; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAL1]], ptr align 4 [[VAL:%.*]], i64 1, i1 false) -; CHECK-NEXT: call void @f(ptr noalias nocapture readonly [[VAL1]]) +; CHECK-NEXT: call void @f(ptr noalias nocapture readonly [[VAL:%.*]]) ; CHECK-NEXT: ret void ; %val1 = alloca i8, align 4 @@ -580,12 +570,9 @@ ret void } -; TODO: remove memcpy define void @immut_param_bigger_align(ptr align 16 noalias %val) { ; CHECK-LABEL: @immut_param_bigger_align( -; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAL1]], ptr [[VAL:%.*]], i64 1, i1 false) -; CHECK-NEXT: call void @f(ptr noalias nocapture readonly [[VAL1]]) +; CHECK-NEXT: call void @f(ptr noalias nocapture readonly [[VAL:%.*]]) ; CHECK-NEXT: ret void ; %val1 = alloca i8, align 4 @@ -608,14 +595,11 @@ ret void } -; TODO: remove memcpy. define void @immut_param_enforced_alignment() { ; CHECK-LABEL: @immut_param_enforced_alignment( -; CHECK-NEXT: [[VAL:%.*]] = alloca i8, align 1 +; CHECK-NEXT: [[VAL:%.*]] = alloca i8, align 4 ; CHECK-NEXT: store i32 42, ptr [[VAL]], align 4 -; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[VAL1]], ptr [[VAL]], i64 1, i1 false) -; CHECK-NEXT: call void @f(ptr noalias nocapture readonly [[VAL1]]) +; CHECK-NEXT: call void @f(ptr noalias nocapture readonly [[VAL]]) ; CHECK-NEXT: ret void ; %val = alloca i8, align 1 @@ -659,14 +643,11 @@ ret void } -; TODO: remove memcpy define void @immut_unescaped_alloca() { ; CHECK-LABEL: @immut_unescaped_alloca( ; CHECK-NEXT: [[VAL:%.*]] = alloca i8, align 4 ; CHECK-NEXT: store i32 42, ptr [[VAL]], align 4 -; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAL1]], ptr align 4 [[VAL]], i64 1, i1 false) -; CHECK-NEXT: call void @f_full_readonly(ptr [[VAL1]]) +; CHECK-NEXT: call void @f_full_readonly(ptr [[VAL]]) ; CHECK-NEXT: ret void ; %val = alloca i8, align 4