Index: llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h =================================================================== --- llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -73,6 +73,7 @@ bool performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, MemSetInst *MemSet, BatchAAResults &BAA); bool processByValArgument(CallBase &CB, unsigned ArgNo); + bool processImmutArgument(CallBase &CB, unsigned ArgNo); Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr, Value *ByteVal); bool moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI); Index: llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp =================================================================== --- llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -1613,6 +1613,96 @@ return true; } +/// This is called on arguments attributed with immutable arguments +/// 1. The memcpy dst is not modified, captured by the call. (noalias readonly +/// nocapture on call-site. +/// 2. the pointer passed to the memcpy dest is an alloca with known alignment & +/// size. +/// 2-1. the memcpy size <= the alloca size which ensures that the new +/// pointer is dereferenceable for the required range +/// 2-2. the src pointer has alignment >= the alloca alignment +/// 3. The memcpy dst and src is not modified between the memcpy and the call. +/// (if MSSA clobber check is safe. +/// 4. The memcpy src is not modified during the call. (if src is attributed +/// with `noalias` or src is unescaped alloca, +bool MemCpyOptPass::processImmutArgument(CallBase &CB, unsigned ArgNo) { + // Ensure passed argument is immutable during call. + if (!(CB.paramHasAttr(ArgNo, Attribute::NoAlias) && + CB.paramHasAttr(ArgNo, Attribute::NoCapture))) + return false; + const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout(); + Value *ImmutArg = CB.getArgOperand(ArgNo); + + // Check that Arg is Alloca + // TODO: Even if the arg get back to branches, we can remove memcpy if the all + // alloca alignments can be enforced to source alignment. + AllocaInst *AI = dyn_cast(ImmutArg->stripPointerCasts()); + if (!AI) + return false; + + std::optional AllocaSize = AI->getAllocationSize(DL); + // Can't handle unknown size alloca (e.g. VLA) + if (!AllocaSize) + return false; + MemoryLocation Loc(ImmutArg, LocationSize::precise(*AllocaSize)); + MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB); + if (!CallAccess) + return false; + + MemCpyInst *MDep = nullptr; + BatchAAResults BAA(*AA); + MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( + CallAccess->getDefiningAccess(), Loc, BAA); + if (auto *MD = dyn_cast(Clobber)) + MDep = dyn_cast_or_null(MD->getMemoryInst()); + + // If the immut argument isn't fed by a memcpy, ignore it. If it is fed by + // a memcpy, check thta the arg equals to memcpy dest. + if (!MDep || MDep->isVolatile() || AI != MDep->getDest()) + return false; + + // The address space of the memcpy source must match the immut argument + if (MDep->getSource()->getType()->getPointerAddressSpace() != + ImmutArg->getType()->getPointerAddressSpace()) + return false; + + // The length of the memcpy must be equal to the size of the alloca. + // memcpy size == allocasize + auto *MDepLen = dyn_cast(MDep->getLength()); + if (!MDepLen || AllocaSize != MDepLen->getValue().getZExtValue()) + return false; + + // If memcpy source align is greater than the alloca's align, then we check to + // see if we can force the source of the memcpy to the alignment we need. + // If we fail, we bail out. + MaybeAlign MemDepAlign = MDep->getSourceAlign(); + Align AllocaAlign = AI->getAlign(); + if ((!MemDepAlign || AllocaAlign < *MemDepAlign) && + AllocaAlign < getOrEnforceKnownAlignment(MDep->getSource(), MemDepAlign, + DL, &CB, AC, DT)) + return false; + + // Verify that the source and dest doesn't change in between the memcpy and + // the call. + // memcpy(a <- b) + // *b = 42; + // foo(*a) + // It would be invalid to transform the second memcpy into foo(*b). + if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep), + MSSA->getMemoryAccess(MDep), CallAccess) || + isModSet(AA->getModRefInfo(&CB, MemoryLocation::getForSource(MDep)))) + return false; + + LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to Immut src:\n" + << " " << *MDep << "\n" + << " " << CB << "\n"); + + // Otherwise we're good! Update the byval argument. + CB.setArgOperand(ArgNo, MDep->getSource()); + ++NumMemCpyInstr; + return true; +} + /// Executes one iteration of MemCpyOptPass. bool MemCpyOptPass::iterateOnFunction(Function &F) { bool MadeChange = false; @@ -1641,9 +1731,14 @@ else if (auto *M = dyn_cast(I)) RepeatInstruction = processMemMove(M); else if (auto *CB = dyn_cast(I)) { - for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) + for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) { if (CB->isByValArgument(i)) MadeChange |= processByValArgument(*CB, i); + // TODO: this can be also decided by MemSSA analysis + else if (CB->onlyReadsMemory(i)) { + MadeChange |= processImmutArgument(*CB, i); + } + } } // Reprocess the instruction if desired. Index: llvm/test/Transforms/MemCpyOpt/memcpy.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/memcpy.ll +++ llvm/test/Transforms/MemCpyOpt/memcpy.ll @@ -397,13 +397,9 @@ declare void @f_full_readnone(ptr nocapture noalias readnone) declare void @f_full_memory_none(ptr nocapture noalias) memory(none) -; TODO: remove memcpy, which is guaranteed to be invariant -; before and after the call because of its attributes. define void @immut_param(ptr align 4 noalias %val) { ; CHECK-LABEL: @immut_param( -; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAL1]], ptr align 4 [[VAL:%.*]], i64 1, i1 false) -; CHECK-NEXT: call void @f(ptr noalias nocapture readonly align 4 [[VAL1]]) +; CHECK-NEXT: call void @f(ptr noalias nocapture readonly align 4 [[VAL:%.*]]) ; CHECK-NEXT: ret void ; %val1 = alloca i8, align 4 @@ -412,12 +408,9 @@ ret void } -; TODO: remove memcpy define void @immut_param_readonly(ptr align 4 noalias %val) { ; CHECK-LABEL: @immut_param_readonly( -; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAL1]], ptr align 4 [[VAL:%.*]], i64 1, i1 false) -; CHECK-NEXT: call void @f_full_readonly(ptr align 4 [[VAL1]]) +; CHECK-NEXT: call void @f_full_readonly(ptr align 4 [[VAL:%.*]]) ; CHECK-NEXT: ret void ; %val1 = alloca i8, align 4 @@ -426,11 +419,9 @@ ret void } -; TODO: remove memcpy define void @immut_param_readnone(ptr align 4 noalias %val) { ; CHECK-LABEL: @immut_param_readnone( -; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4 -; CHECK-NEXT: call void @f_full_readnone(ptr align 4 [[VAL1]]) +; CHECK-NEXT: call void @f_full_readnone(ptr align 4 [[VAL:%.*]]) ; CHECK-NEXT: ret void ; %val1 = alloca i8, align 4 @@ -439,7 +430,6 @@ ret void } -; TODO: remove memcpy define void @immut_param_memory_none(ptr align 4 noalias %val) { ; CHECK-LABEL: @immut_param_memory_none( ; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4 @@ -452,12 +442,9 @@ ret void } -; TODO: remove memcpy define void @immut_param_no_align(ptr align 4 noalias %val) { ; CHECK-LABEL: @immut_param_no_align( -; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAL1]], ptr align 4 [[VAL:%.*]], i64 1, i1 false) -; CHECK-NEXT: call void @f(ptr noalias nocapture readonly [[VAL1]]) +; CHECK-NEXT: call void @f(ptr noalias nocapture readonly [[VAL:%.*]]) ; CHECK-NEXT: ret void ; %val1 = alloca i8, align 4 @@ -481,7 +468,7 @@ ret void } -; can't remove memcpy because src is modified between call and memcpy +; Can't remove memcpy because src is modified between call and memcpy define void @immut_param_modified(ptr align 4 noalias %val) { ; CHECK-LABEL: @immut_param_modified( ; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4 @@ -497,8 +484,7 @@ ret void } -; can't remove memcpy if we remove, the alignment assumption would break -; can't remove memcpy if we remove, the alignment assumption would break +; Can't remove memcpy if we remove, the alignment assumption would break define void @immut_param_bigger_align(ptr align 16 noalias %val) { ; CHECK-LABEL: @immut_param_bigger_align( ; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4 @@ -512,7 +498,7 @@ ret void } -; can't remove memcpy, because if the %val directly passed to @f, +; Can't remove memcpy, because if the %val directly passed to @f, ; alignment of ptr to f's argument will be different. define void @immut_invalid_align_branched(i1 %c, ptr noalias %val) { ; CHECK-LABEL: @immut_invalid_align_branched( @@ -531,7 +517,7 @@ ret void } -; can't remove memcpy, because alias might modify the src. +; Can't remove memcpy, because alias might modify the src. define void @immut_but_alias_src(i1 %c, ptr %val) { ; CHECK-LABEL: @immut_but_alias_src( ; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4 @@ -545,14 +531,11 @@ ret void } -; TODO: remove memcpy define void @immut_unescaped_alloca(i1 %c) { ; CHECK-LABEL: @immut_unescaped_alloca( ; CHECK-NEXT: [[VAL:%.*]] = alloca i8, align 4 ; CHECK-NEXT: store i32 42, ptr [[VAL]], align 4 -; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAL1]], ptr align 4 [[VAL]], i64 1, i1 false) -; CHECK-NEXT: call void @f_full_readonly(ptr [[VAL1]]) +; CHECK-NEXT: call void @f_full_readonly(ptr [[VAL]]) ; CHECK-NEXT: ret void ; %val = alloca i8, align 4 @@ -563,7 +546,7 @@ ret void } -; can't remove memcpy, because alloca src is modified +; Can't remove memcpy, because alloca src is modified define void @immut_unescaped_alloca_modified(i1 %c) { ; CHECK-LABEL: @immut_unescaped_alloca_modified( ; CHECK-NEXT: [[VAL:%.*]] = alloca i8, align 4