Index: llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h =================================================================== --- llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -73,6 +73,7 @@ bool performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, MemSetInst *MemSet, BatchAAResults &BAA); bool processByValArgument(CallBase &CB, unsigned ArgNo); + bool processImmutArgument(CallBase &CB, unsigned ArgNo); Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr, Value *ByteVal); bool moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI); Index: llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp =================================================================== --- llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -1613,6 +1613,93 @@ return true; } +/// This is called on arguments attributed with immutable arguments +/// TODO: write doc comments about the conditions +/// The memcpy dst is not modified by the call. (if noalias readonly on +/// call-site. The memcpy dst is not modified between the memcpy and the call. +/// (if MSSA clobber check is safe. The memcpy src is not modified between the +/// memcpy and the call. (if MSSA clobber check is safe. The memcpy src is not +/// modified during the call. (if src is attributed with noalias. ModRef check. +/// or src is unescaped alloca +bool MemCpyOptPass::processImmutArgument(CallBase &CB, unsigned ArgNo) { + const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout(); + Value *ImmutArg = CB.getArgOperand(ArgNo); + // FIXME: cond ImmutArg equals alloca is sufficient but not necessary + // FIXME: need to ristrict ptr args? -> might be + AllocaInst *AI = dyn_cast(ImmutArg); + if (!AI) + return false; + std::optional ImmutSize = AI->getAllocationSize(DL); + // can't handle uunknown size alloca e.g. VLA + if (!ImmutSize) + return false; + MemoryLocation Loc(ImmutArg, LocationSize::precise(*ImmutSize)); + // FIXME?: maybe MemoryUse is ok because of no modification + MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB); + if (!CallAccess) + return false; + MemCpyInst *MDep = nullptr; + BatchAAResults BAA(*AA); + MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( + CallAccess->getDefiningAccess(), Loc, BAA); + if (auto *MD = dyn_cast(Clobber)) + MDep = dyn_cast_or_null(MD->getMemoryInst()); + + // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by + // a memcpy, see if we can byval from the source of the memcpy instead of the + // result. + if (!MDep || MDep->isVolatile() || + ImmutArg->stripPointerCasts() != MDep->getDest()) + return false; + + // The length of the memcpy must be larger or equal to the size of the byval. + auto *C1 = dyn_cast(MDep->getLength()); + if (!C1 || !TypeSize::isKnownGE( + TypeSize::getFixed(C1->getValue().getZExtValue()), *ImmutSize)) + return false; + + // Get the alignment of the byval. If the call doesn't specify the alignment, + // then it is some target specific value that we can't know. + MaybeAlign ImmutAlign = CB.getParamAlign(ArgNo); + if (!ImmutAlign) + return false; + + // If it is greater than the memcpy, then we check to see if we can force the + // source of the memcpy to the alignment we need. If we fail, we bail out. + MaybeAlign MemDepAlign = MDep->getSourceAlign(); + if ((!MemDepAlign || *MemDepAlign < *ImmutAlign) && + getOrEnforceKnownAlignment(MDep->getSource(), ImmutAlign, DL, &CB, AC, + DT) < *ImmutAlign) + return false; + + // The address space of the memcpy source must match the byval argument + if (MDep->getSource()->getType()->getPointerAddressSpace() != + ImmutArg->getType()->getPointerAddressSpace()) + return false; + + if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep), + MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB))) + return false; + + Value *TmpCast = MDep->getSource(); + if (MDep->getSource()->getType() != ImmutArg->getType()) { + BitCastInst *TmpBitCast = + new BitCastInst(MDep->getSource(), ImmutArg->getType(), "tmpcast", &CB); + // Set the tmpcast's DebugLoc to MDep's + TmpBitCast->setDebugLoc(MDep->getDebugLoc()); + TmpCast = TmpBitCast; + } + + LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to noalias readonly:\n" + << " " << *MDep << "\n" + << " " << CB << "\n"); + + // Otherwise we're good! Update the byval argument. + CB.setArgOperand(ArgNo, TmpCast); + ++NumMemCpyInstr; + return true; +} + /// Executes one iteration of MemCpyOptPass. bool MemCpyOptPass::iterateOnFunction(Function &F) { bool MadeChange = false; @@ -1641,9 +1728,16 @@ else if (auto *M = dyn_cast(I)) RepeatInstruction = processMemMove(M); else if (auto *CB = dyn_cast(I)) { - for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) + for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) { if (CB->isByValArgument(i)) MadeChange |= processByValArgument(*CB, i); + else if (CB->onlyReadsMemory(i)) { + if (!(CB->paramHasAttr(i, Attribute::NoAlias) && + CB->paramHasAttr(i, Attribute::NoCapture))) + continue; + MadeChange |= processImmutArgument(*CB, i); + } + } } // Reprocess the instruction if desired. Index: llvm/test/Transforms/MemCpyOpt/memcpy.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/memcpy.ll +++ llvm/test/Transforms/MemCpyOpt/memcpy.ll @@ -403,9 +403,7 @@ define void @immut_param1(ptr align 4 noalias %val) { ; CHECK-LABEL: @immut_param1( ; CHECK-NEXT: start: -; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAL1]], ptr align 4 [[VAL:%.*]], i64 1, i1 false) -; CHECK-NEXT: call void @f(ptr noalias nocapture readonly align 4 [[VAL1]]) +; CHECK-NEXT: call void @f(ptr noalias nocapture readonly align 4 [[VAL:%.*]]) ; CHECK-NEXT: ret void ; start: @@ -419,9 +417,7 @@ define void @immut_param2_readonly(ptr align 4 noalias %val) { ; CHECK-LABEL: @immut_param2_readonly( ; CHECK-NEXT: start: -; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAL1]], ptr align 4 [[VAL:%.*]], i64 1, i1 false) -; CHECK-NEXT: call void @f_full_readonly(ptr align 4 [[VAL1]]) +; CHECK-NEXT: call void @f_full_readonly(ptr align 4 [[VAL:%.*]]) ; CHECK-NEXT: ret void ; start: @@ -435,8 +431,7 @@ define void @immut_param2_readnone(ptr align 4 noalias %val) { ; CHECK-LABEL: @immut_param2_readnone( ; CHECK-NEXT: start: -; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4 -; CHECK-NEXT: call void @f_full_readnone(ptr align 4 [[VAL1]]) +; CHECK-NEXT: call void @f_full_readnone(ptr align 4 [[VAL:%.*]]) ; CHECK-NEXT: ret void ; start: