diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1341,18 +1341,32 @@ // If the store is a memcpy instruction, we must check if it will write to // the load memory locations. So remove it from the ignored stores. - if (IsMemCpy) + if (IsMemCpy) { IgnoredInsts.erase(TheStore); + UseMemMove = false; + } if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount, StoreSizeSCEV, *AA, IgnoredInsts)) { - ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad", TheLoad) - << ore::NV("Inst", InstRemark) << " in " - << ore::NV("Function", TheStore->getFunction()) - << " function will not be hoisted: " - << ore::NV("Reason", "The loop may access load location"); - }); - return Changed; + // If this is a memcpy instruction, and the only instruction accessing the + // load is the memcpy itself, we can transform it to memmove since memmove + // handles overlapping copy. + if (IsMemCpy) { + IgnoredInsts.insert(TheStore); + UseMemMove = + !mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount, + StoreSizeSCEV, *AA, IgnoredInsts); + } + if (!IsMemCpy || !UseMemMove) { + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad", + TheLoad) + << ore::NV("Inst", InstRemark) << " in " + << ore::NV("Function", TheStore->getFunction()) + << " function will not be hoisted: " + << ore::NV("Reason", "The loop may access load location"); + }); + return Changed; + } } if (UseMemMove) { // Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr for @@ -1363,7 +1377,8 @@ const Value *BP2 = llvm::GetPointerBaseWithConstantOffset( StoreBasePtr->stripPointerCasts(), StoreOff, *DL); int64_t LoadSize = - DL->getTypeSizeInBits(TheLoad->getType()).getFixedSize() / 8; + IsMemCpy ? StoreSize + : DL->getTypeSizeInBits(TheLoad->getType()).getFixedSize() / 8; if (BP1 != BP2 || LoadSize != int64_t(StoreSize)) return Changed; if ((!IsNegStride && LoadOff < StoreOff + int64_t(StoreSize)) || diff --git a/llvm/test/Transforms/LoopIdiom/basic.ll b/llvm/test/Transforms/LoopIdiom/basic.ll --- a/llvm/test/Transforms/LoopIdiom/basic.ll +++ b/llvm/test/Transforms/LoopIdiom/basic.ll @@ -1266,17 +1266,54 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) -;; FIXME: Do not form memmove from loop body containing memcpy. +define void @memcpy_to_memmove(i8* %Src, i64 %Size) { +; CHECK-LABEL: @memcpy_to_memmove( +; CHECK-NEXT: bb.nph: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i64 1 +; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 [[SRC]], i8* align 1 [[SCEVGEP]], i64 [[SIZE:%.*]], i1 false) +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 1 +; CHECK-NEXT: [[SRCI:%.*]] = getelementptr i8, i8* [[SRC]], i64 [[STEP]] +; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, i8* [[SRC]], i64 [[INDVAR]] +; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +bb.nph: + br label %for.body + +for.body: ; preds = %bb.nph, %for.body + %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] + %Step = add nuw nsw i64 %indvar, 1 + %SrcI = getelementptr i8, i8* %Src, i64 %Step + %DestI = getelementptr i8, i8* %Src, i64 %indvar + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %DestI, i8* align 1 %SrcI, i64 1, i1 false) + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, %Size + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +;; Do not form memmove when loop writes to load location define void @do_not_form_memmove4(i8* %Src, i64 %Size) { ; CHECK-LABEL: @do_not_form_memmove4( ; CHECK-NEXT: bb.nph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[BB_NPH:%.*]] ] ; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 1 -; CHECK-NEXT: [[SRCI:%.*]] = getelementptr i8, i8* [[SRC]], i64 [[STEP]] +; CHECK-NEXT: [[SRCI:%.*]] = getelementptr i8, i8* [[SRC:%.*]], i64 [[STEP]] ; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i8, i8* [[SRC]], i64 [[INDVAR]] ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[DESTI]], i8* align 1 [[SRCI]], i64 1, i1 false) +; CHECK-NEXT: [[MOD:%.*]] = load i8, i8* [[SRCI]], align 1 +; CHECK-NEXT: [[INC:%.*]] = add i8 [[MOD]], 1 +; CHECK-NEXT: store i8 [[INC]], i8* [[SRCI]], align 1 ; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] @@ -1287,16 +1324,19 @@ br label %for.body for.body: ; preds = %bb.nph, %for.body - %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] + %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %bb.nph ] %Step = add nuw nsw i64 %indvar, 1 %SrcI = getelementptr i8, i8* %Src, i64 %Step %DestI = getelementptr i8, i8* %Src, i64 %indvar call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %DestI, i8* align 1 %SrcI, i64 1, i1 false) + %Mod = load i8, i8* %SrcI, align 1 + %inc = add i8 %Mod, 1 + store i8 %inc, i8* %SrcI, align 1 %indvar.next = add i64 %indvar, 1 %exitcond = icmp eq i64 %indvar.next, %Size br i1 %exitcond, label %for.end, label %for.body -for.end: ; preds = %for.body, %entry +for.end: ; preds = %for.body ret void } diff --git a/llvm/test/Transforms/LoopIdiom/memcpy-intrinsic.ll b/llvm/test/Transforms/LoopIdiom/memcpy-intrinsic.ll --- a/llvm/test/Transforms/LoopIdiom/memcpy-intrinsic.ll +++ b/llvm/test/Transforms/LoopIdiom/memcpy-intrinsic.ll @@ -430,5 +430,47 @@ br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit } +;; A memmove-like loop with memcpys in loop body +define void @to_memmove(%struct.S* %Src, i64 %Size) { +; CHECK-LABEL: @to_memmove( +; CHECK-NEXT: bb.nph: +; CHECK-NEXT: [[SRC1:%.*]] = bitcast %struct.S* [[SRC:%.*]] to i8* +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[SRC]], i64 1 +; CHECK-NEXT: [[SCEVGEP2:%.*]] = bitcast %struct.S* [[SCEVGEP]] to i8* +; CHECK-NEXT: [[TMP0:%.*]] = mul nuw i64 [[SIZE:%.*]], 12 +; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 4 [[SRC1]], i8* align 4 [[SCEVGEP2]], i64 [[TMP0]], i1 false) +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 1 +; CHECK-NEXT: [[SRCI:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[SRC]], i64 [[STEP]] +; CHECK-NEXT: [[DESTI:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[SRC]], i64 [[INDVAR]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast %struct.S* [[DESTI]] to i8* +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %struct.S* [[SRCI]] to i8* +; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +bb.nph: + br label %for.body + +for.body: ; preds = %bb.nph, %for.body + %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] + %Step = add nuw nsw i64 %indvar, 1 + %SrcI = getelementptr %struct.S, %struct.S* %Src, i64 %Step + %DestI = getelementptr %struct.S, %struct.S* %Src, i64 %indvar + %0 = bitcast %struct.S* %DestI to i8* + %1 = bitcast %struct.S* %SrcI to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 12, i1 false) + %indvar.next = add i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, %Size + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + ; Function Attrs: argmemonly nofree nosync nounwind willreturn declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1