Index: llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h =================================================================== --- llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -73,6 +73,7 @@ bool performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, MemSetInst *MemSet, BatchAAResults &BAA); bool processByValArgument(CallBase &CB, unsigned ArgNo); + bool processImmutArgument(CallBase &CB, unsigned ArgNo); Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr, Value *ByteVal); bool moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI); Index: llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp =================================================================== --- llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -1613,6 +1613,117 @@ return true; } +/// This is called on arguments attributed with immutable arguments +/// TODO: write doc comments about the conditions +/// dest ptr is not modified between memcpy and call +/// 1) The memcpy dst is not modified by the call. +/// 2) The memcpy dst is not modified between the memcpy and the call.(if MSSA +/// clobber check is safe +/// +/// 3) The memcpy src is not modified between the +/// memcpy and the call. ((if src is attributed with noalias. and MSSA clobber +/// check for srcis safe. ModRef check. Or src is unescaped alloca. +bool MemCpyOptPass::processImmutArgument(CallBase &CB, unsigned ArgNo) { + // ensure passed argument is immutable during call. + // FIXME: no write on call base can be guaranteed by ModRefCheck + if (!(CB.paramHasAttr(ArgNo, Attribute::NoAlias) && + CB.paramHasAttr(ArgNo, Attribute::NoCapture))) + return false; + const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout(); + Value *ImmutArg = CB.getArgOperand(ArgNo); + + // check that Arg is Alloca + // TODO: collect possible allocas like + // %val1 = alloca [4 x i8], align 4 + // %val2 = alloca [16 x i8], align 4 + // %val3 = select i1 %c, ptr %val1, ptr %val2 + AllocaInst *AI = dyn_cast(ImmutArg->stripPointerCasts()); + if (!AI) + return false; + + std::optional AllocaSize = AI->getAllocationSize(DL); + // can't handle unknown size alloca (e.g. VLA) + if (!AllocaSize) + return false; + MemoryLocation Loc(ImmutArg, LocationSize::precise(*AllocaSize)); + MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB); + if (!CallAccess) + return false; + + MemCpyInst *MDep = nullptr; + BatchAAResults BAA(*AA); + MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess( + CallAccess->getDefiningAccess(), Loc, BAA); + if (auto *MD = dyn_cast(Clobber)) + MDep = dyn_cast_or_null(MD->getMemoryInst()); + + // If the immut argument isn't fed by a memcpy, ignore it. If it is fed by + // a memcpy, see the arg equals to memcpy dest + if (!MDep || MDep->isVolatile() || + ImmutArg->stripPointerCasts() != MDep->getDest()) + return false; + + // currently handle only `noalias` attributed argument or unescaped + // alloca also which are ensured only intra-procedual Clobber check is + // sufficient. + // TODO: more general patterns + Value *Src = MDep->getSource()->stripPointerCasts(); + if (auto *Arg = dyn_cast(Src); + !((Arg && Arg->hasNoAliasAttr()) || isa(Src))) + return false; + + // currently handle only `noalias` attributed argument or unescaped + // The length of the memcpy must be larger or equal to the size of the byval. + // memcpy size == allocasize + auto *MDepLen = dyn_cast(MDep->getLength()); + if (!MDepLen || AllocaSize != MDepLen->getValue().getZExtValue()) + return false; + + Align AllocaAlign = AI->getAlign(); + // If it is greater than the memcpy, then we check to see if we can force the + // source of the memcpy to the alignment we need. If we fail, we bail out. + MaybeAlign MemDepAlign = MDep->getSourceAlign(); + if ((!MemDepAlign || *MemDepAlign < AllocaAlign) && + getOrEnforceKnownAlignment(MDep->getSource(), MaybeAlign(AllocaAlign), DL, + &CB, AC, DT) < AllocaAlign) + return false; + + // The address space of the memcpy source must match the immut argument + if (MDep->getSource()->getType()->getPointerAddressSpace() != + ImmutArg->getType()->getPointerAddressSpace()) + return false; + + // Verify that the src and dest doesn't change in between the memcpy and + // the call. + // memcpy(a <- b) + // *b = 42; + // foo(*a) + // It would be invalid to transform the second memcpy into foo(*b). + if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep), + MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB)) || + writtenBetween(MSSA, BAA, MemoryLocation::getForDest(MDep), + MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB))) + return false; + + Value *TmpCast = MDep->getSource(); + if (MDep->getSource()->getType() != ImmutArg->getType()) { + BitCastInst *TmpBitCast = + new BitCastInst(MDep->getSource(), ImmutArg->getType(), "tmpcast", &CB); + // Set the tmpcast's DebugLoc to MDep's + TmpBitCast->setDebugLoc(MDep->getDebugLoc()); + TmpCast = TmpBitCast; + } + + LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to Immut src:\n" + << " " << *MDep << "\n" + << " " << CB << "\n"); + + // Otherwise we're good! Update the byval argument. + CB.setArgOperand(ArgNo, TmpCast); + ++NumMemCpyInstr; + return true; +} + /// Executes one iteration of MemCpyOptPass. bool MemCpyOptPass::iterateOnFunction(Function &F) { bool MadeChange = false; @@ -1641,9 +1752,14 @@ else if (auto *M = dyn_cast(I)) RepeatInstruction = processMemMove(M); else if (auto *CB = dyn_cast(I)) { - for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) + for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) { if (CB->isByValArgument(i)) MadeChange |= processByValArgument(*CB, i); + // TODO: this can be also decided by MemSSA analysis + else if (CB->onlyReadsMemory(i)) { + MadeChange |= processImmutArgument(*CB, i); + } + } } // Reprocess the instruction if desired. Index: llvm/test/Transforms/MemCpyOpt/memcpy.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/memcpy.ll +++ llvm/test/Transforms/MemCpyOpt/memcpy.ll @@ -397,13 +397,9 @@ declare void @f_full_readnone(ptr nocapture noalias readnone) declare void @f_full_memory_none(ptr nocapture noalias) memory(none) -; TODO: remove memcpy, which is guaranteed to be invariant -; before and after the call because of its attributes. define void @immut_param(ptr align 4 noalias %val) { ; CHECK-LABEL: @immut_param( -; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAL1]], ptr align 4 [[VAL:%.*]], i64 1, i1 false) -; CHECK-NEXT: call void @f(ptr noalias nocapture readonly align 4 [[VAL1]]) +; CHECK-NEXT: call void @f(ptr noalias nocapture readonly align 4 [[VAL:%.*]]) ; CHECK-NEXT: ret void ; %val1 = alloca i8, align 4 @@ -412,12 +408,9 @@ ret void } -; TODO: remove memcpy define void @immut_param_readonly(ptr align 4 noalias %val) { ; CHECK-LABEL: @immut_param_readonly( -; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAL1]], ptr align 4 [[VAL:%.*]], i64 1, i1 false) -; CHECK-NEXT: call void @f_full_readonly(ptr align 4 [[VAL1]]) +; CHECK-NEXT: call void @f_full_readonly(ptr align 4 [[VAL:%.*]]) ; CHECK-NEXT: ret void ; %val1 = alloca i8, align 4 @@ -426,11 +419,9 @@ ret void } -; TODO: remove memcpy define void @immut_param_readnone(ptr align 4 noalias %val) { ; CHECK-LABEL: @immut_param_readnone( -; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4 -; CHECK-NEXT: call void @f_full_readnone(ptr align 4 [[VAL1]]) +; CHECK-NEXT: call void @f_full_readnone(ptr align 4 [[VAL:%.*]]) ; CHECK-NEXT: ret void ; %val1 = alloca i8, align 4 @@ -439,7 +430,6 @@ ret void } -; TODO: remove memcpy define void @immut_param_memory_none(ptr align 4 noalias %val) { ; CHECK-LABEL: @immut_param_memory_none( ; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4 @@ -452,12 +442,9 @@ ret void } -; TODO: remove memcpy define void @immut_param_no_align(ptr align 4 noalias %val) { ; CHECK-LABEL: @immut_param_no_align( -; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAL1]], ptr align 4 [[VAL:%.*]], i64 1, i1 false) -; CHECK-NEXT: call void @f(ptr noalias nocapture readonly [[VAL1]]) +; CHECK-NEXT: call void @f(ptr noalias nocapture readonly [[VAL:%.*]]) ; CHECK-NEXT: ret void ; %val1 = alloca i8, align 4 @@ -534,14 +521,11 @@ ret void } -; TODO: remove memcpy define void @immut_unescaped_alloca(i1 %c) { ; CHECK-LABEL: @immut_unescaped_alloca( ; CHECK-NEXT: [[VAL:%.*]] = alloca i8, align 4 ; CHECK-NEXT: store i32 42, ptr [[VAL]], align 4 -; CHECK-NEXT: [[VAL1:%.*]] = alloca i8, align 4 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAL1]], ptr align 4 [[VAL]], i64 1, i1 false) -; CHECK-NEXT: call void @f_full_readonly(ptr [[VAL1]]) +; CHECK-NEXT: call void @f_full_readonly(ptr [[VAL]]) ; CHECK-NEXT: ret void ; %val = alloca i8, align 4