Index: llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -231,10 +231,26 @@ } } } - if (auto *CB = dyn_cast(Inst)) - // All the supported TLI functions so far happen to have dest as their - // first argument. - return MemoryLocation::getAfter(CB->getArgOperand(0)); + + if (auto *CB = dyn_cast(Inst)) { + LibFunc LF; + if (TLI.getLibFunc(*CB, LF) && TLI.has(LF)) { + switch (LF) { + case LibFunc_strncpy: + if (const auto *Len = dyn_cast(CB->getArgOperand(2))) + return MemoryLocation(CB->getArgOperand(0), + LocationSize::precise(Len->getZExtValue()), + CB->getAAMetadata()); + [[clang::fallthrough]]; + + default: + // All the supported TLI functions so far happen to have dest as their + // first argument. + return MemoryLocation::getAfter(CB->getArgOperand(0)); + } + } + } + return MemoryLocation(); } @@ -1114,8 +1130,13 @@ LibFunc LF; if (TLI.getLibFunc(*CB, LF) && TLI.has(LF)) { switch (LF) { - case LibFunc_strcpy: case LibFunc_strncpy: + if (const auto *Len = dyn_cast(CB->getArgOperand(2))) + return MemoryLocation(CB->getArgOperand(0), + LocationSize::precise(Len->getZExtValue()), + CB->getAAMetadata()); + [[clang::fallthrough]]; + case LibFunc_strcpy: case LibFunc_strcat: case LibFunc_strncat: return {MemoryLocation::getAfter(CB->getArgOperand(0))}; @@ -1416,8 +1437,16 @@ // clobber, bail out, as the path is not profitable. We skip this check // for intrinsic calls, because the code knows how to handle memcpy // intrinsics. - if (!isa(CurrentI) && isReadClobber(KillingLoc, CurrentI)) - return None; + if (!isa(CurrentI) && + isReadClobber(KillingLoc, CurrentI)) { + if (const auto *CB = dyn_cast(CurrentI)) { + LibFunc LF; + if (!(TLI.getLibFunc(*CB, LF) && TLI.has(LF) && + LF == LibFunc_strncpy)) + return None; + } else + return None; + } // Quick check if there are direct uses that are read-clobbers. if (any_of(Current->uses(), [this, &KillingLoc, StartAccess](Use &U) { Index: llvm/test/Transforms/DeadStoreElimination/memset-and-strncpy.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/DeadStoreElimination/memset-and-strncpy.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -O2 -S | FileCheck %s + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind +declare i8* @strncpy(i8* noalias nocapture, i8* noalias nocapture readonly, i64) + +; Test that strncpy/memset overwriting each other is optimized out + +; strncpy -> memset, full overwrite +define void @test1(i8* noalias %out, i8* noalias %in) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(100) [[OUT:%.*]], i8 42, i64 100, i1 false) +; CHECK-NEXT: ret void +; + %call = tail call i8* @strncpy(i8* %out, i8* %in, i64 100) + tail call void @llvm.memset.p0i8.i64(i8* %out, i8 42, i64 100, i1 false) + ret void +} + +; strncpy -> memset, partial overwrite +define void @test2(i8* noalias %out, i8* noalias %in) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @strncpy(i8* noundef nonnull dereferenceable(1) [[OUT:%.*]], i8* noundef nonnull dereferenceable(1) [[IN:%.*]], i64 100) +; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(99) [[OUT]], i8 42, i64 99, i1 false) +; CHECK-NEXT: ret void +; + %call = tail call i8* @strncpy(i8* %out, i8* %in, i64 100) + tail call void @llvm.memset.p0i8.i64(i8* %out, i8 42, i64 99, i1 false) + ret void +} + +; strncpy -> memset, different destination +define void @test3(i8* noalias %out1, i8* noalias %out2, i8* noalias %in) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @strncpy(i8* noundef nonnull dereferenceable(1) [[OUT1:%.*]], i8* noundef nonnull dereferenceable(1) [[IN:%.*]], i64 100) +; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(100) [[OUT2:%.*]], i8 42, i64 100, i1 false) +; CHECK-NEXT: ret void +; + %call = tail call i8* @strncpy(i8* %out1, i8* %in, i64 100) + tail call void @llvm.memset.p0i8.i64(i8* %out2, i8 42, i64 100, i1 false) + ret void +} + + +; memset -> strncpy, full overwrite +define void @test4(i8* noalias %out, i8* noalias %in) { +; CHECK-LABEL: @test4( +; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @strncpy(i8* noundef nonnull dereferenceable(1) [[OUT:%.*]], i8* noundef nonnull dereferenceable(1) [[IN:%.*]], i64 100) +; CHECK-NEXT: ret void +; + tail call void @llvm.memset.p0i8.i64(i8* %out, i8 42, i64 100, i1 false) + %call = tail call i8* @strncpy(i8* %out, i8* %in, i64 100) + ret void +} + +; memset -> strncpy, partial overwrite +define void @test5(i8* noalias %out, i8* noalias %in) { +; CHECK-LABEL: @test5( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[OUT:%.*]], i64 99 +; CHECK-NEXT: store i8 42, i8* [[TMP1]], align 1 +; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @strncpy(i8* noundef nonnull dereferenceable(1) [[OUT]], i8* noundef nonnull dereferenceable(1) [[IN:%.*]], i64 99) +; CHECK-NEXT: ret void +; + tail call void @llvm.memset.p0i8.i64(i8* %out, i8 42, i64 100, i1 false) + %call = tail call i8* @strncpy(i8* %out, i8* %in, i64 99) + ret void +} + +; memset -> strncpy, different destination +define void @test6(i8* noalias %out1, i8* noalias %out2, i8* noalias %in) { +; CHECK-LABEL: @test6( +; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(100) [[OUT1:%.*]], i8 42, i64 100, i1 false) +; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @strncpy(i8* noundef nonnull dereferenceable(1) [[OUT2:%.*]], i8* noundef nonnull dereferenceable(1) [[IN:%.*]], i64 100) +; CHECK-NEXT: ret void +; + tail call void @llvm.memset.p0i8.i64(i8* %out1, i8 42, i64 100, i1 false) + %call = tail call i8* @strncpy(i8* %out2, i8* %in, i64 100) + ret void +}