diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h --- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -161,7 +161,6 @@ Value *optimizeStrNDup(CallInst *CI, IRBuilderBase &B); Value *optimizeStrCpy(CallInst *CI, IRBuilderBase &B); Value *optimizeStpCpy(CallInst *CI, IRBuilderBase &B); - Value *optimizeStrNCpy(CallInst *CI, IRBuilderBase &B); Value *optimizeStrLen(CallInst *CI, IRBuilderBase &B); Value *optimizeStrNLen(CallInst *CI, IRBuilderBase &B); Value *optimizeStrPBrk(CallInst *CI, IRBuilderBase &B); @@ -182,6 +181,9 @@ Value *optimizeRealloc(CallInst *CI, IRBuilderBase &B); Value *optimizeWcslen(CallInst *CI, IRBuilderBase &B); Value *optimizeBCopy(CallInst *CI, IRBuilderBase &B); + + // Helper to optimize stpncpy and strncpy. + Value *optimizeStringNCpy(CallInst *CI, bool RetEnd, IRBuilderBase &B); // Wrapper for all String/Memory Library Call Optimizations Value *optimizeStringMemoryLibCall(CallInst *CI, IRBuilderBase &B); diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -688,7 +688,10 @@ return DstEnd; } -Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilderBase &B) { +// Optimize a call CI to either stpncpy when RetEnd is true, or to strncpy +// otherwise. +Value *LibCallSimplifier::optimizeStringNCpy(CallInst *CI, bool RetEnd, + IRBuilderBase &B) { Function *Callee = CI->getCalledFunction(); Value *Dst = CI->getArgOperand(0); Value *Src = CI->getArgOperand(1); @@ -697,27 +700,40 @@ if (isKnownNonZero(Size, DL)) annotateNonNullNoUndefBasedOnAccess(CI, 1); - uint64_t Len; - if (ConstantInt *LengthArg = dyn_cast(Size)) - Len = LengthArg->getZExtValue(); - else - return nullptr; - - // strncpy(x, y, 0) -> x - if (Len == 0) - return Dst; - // See if we can get the length of the input string. uint64_t SrcLen = GetStringLength(Src); if (SrcLen) { annotateDereferenceableBytes(CI, 1, SrcLen); --SrcLen; // Unbias length. } else { - return nullptr; + auto *CstSize = dyn_cast(Size); + if (!CstSize) + return nullptr; + + // Handle copies of up to one byte from nonconstant strings. + if (CstSize->isZeroValue()) + return Dst; + + if (!CstSize->isOneValue()) + return nullptr; + + Type *CharTy = B.getInt8Ty(); + Value *CharVal = B.CreateLoad(CharTy, Src, "stxncpy.char0"); + B.CreateStore(CharVal, Dst); + if (!RetEnd) + return Dst; + + // Transform stpncpy(d, s, 1) to return (*d = *s) ? d + 1 : d. + Value *ZeroChar = ConstantInt::get(CharTy, 0); + Value *Cmp = B.CreateICmpEQ(CharVal, ZeroChar, "stpncpy.char0cmp"); + + Value *Off1 = B.getInt32(1); + Value *EndPtr = B.CreateInBoundsGEP(CharTy, Dst, Off1, "stpncpy.end"); + return B.CreateSelect(Cmp, Dst, EndPtr, "stpncpy.sel"); } if (SrcLen == 0) { - // strncpy(x, "", y) -> memset(x, '\0', y) + // st{p,r}ncpy(x, "", N) -> memset(x, '\0', N) for any N. Align MemSetAlign = CI->getAttributes().getParamAttrs(0).getAlignment().valueOrOne(); CallInst *NewCI = B.CreateMemSet(Dst, B.getInt8('\0'), Size, MemSetAlign); @@ -728,14 +744,26 @@ return Dst; } - // strncpy(a, "a", 4) - > memcpy(a, "a\0\0\0", 4) - if (Len > SrcLen + 1) { - if (Len <= 128) { + uint64_t N; + if (ConstantInt *SizeC = dyn_cast(Size)) + N = SizeC->getZExtValue(); + else + return nullptr; + + // st{p,r}ncpy(x, y, 0) -> x + if (N == 0) + return Dst; + + if (N > SrcLen + 1) { + // st{p,r}ncpy(a, "a", 4) -> memcpy(a, "a\0\0\0", 4) + if (N <= 128) { StringRef Str; if (!getConstantStringInfo(Src, Str)) return nullptr; std::string SrcStr = Str.str(); - SrcStr.resize(Len, '\0'); + // Create a bigger, nul-padded array with the same length, SrcLen, + // as the original string. + SrcStr.resize(N, '\0'); Src = B.CreateGlobalString(SrcStr, "str"); } else { return nullptr; @@ -743,13 +771,18 @@ } Type *PT = Callee->getFunctionType()->getParamType(0); - // strncpy(x, s, c) -> memcpy(align 1 x, align 1 s, c) [s and c are constant] + // st{p,r}ncpy(x, s, N) -> memcpy(align 1 x, align 1 s, N) when both + // s and N are constant. CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1), - ConstantInt::get(DL.getIntPtrType(PT), Len)); + ConstantInt::get(DL.getIntPtrType(PT), N)); NewCI->setAttributes(CI->getAttributes()); NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType())); copyFlags(*CI, NewCI); - return Dst; + if (!RetEnd) + return Dst; + + Value *Off = B.getInt64(SrcLen); + return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, Off, "endptr"); } Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilderBase &B, @@ -3194,8 +3227,10 @@ return optimizeStrCpy(CI, Builder); case LibFunc_stpcpy: return optimizeStpCpy(CI, Builder); + case LibFunc_stpncpy: + return optimizeStringNCpy(CI, /*RetEnd=*/true, Builder); case LibFunc_strncpy: - return optimizeStrNCpy(CI, Builder); + return optimizeStringNCpy(CI, /*RetEnd=*/false, Builder); case LibFunc_strlen: return optimizeStrLen(CI, Builder); case LibFunc_strnlen: diff --git a/llvm/test/Transforms/InstCombine/simplify-libcalls.ll b/llvm/test/Transforms/InstCombine/simplify-libcalls.ll --- a/llvm/test/Transforms/InstCombine/simplify-libcalls.ll +++ b/llvm/test/Transforms/InstCombine/simplify-libcalls.ll @@ -289,7 +289,7 @@ declare i8* @__stpncpy_chk(i8* noundef, i8* noundef, i32 noundef, i32 noundef) define signext i32 @emit_stpncpy() { ; CHECK-LABEL: @emit_stpncpy( -; CHECK-NEXT: [[STPNCPY:%.*]] = call i8* @stpncpy(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @b, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @a, i32 0, i32 0), i32 2) +; CHECK-NEXT: [[STPNCPY:%.*]] = call i8* @stpncpy(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([5 x i8], [5 x i8]* @b, i32 0, i32 0), i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @a, i32 0, i32 0), i32 2) ; CHECK-NEXT: ret i32 0 ; %call = call i8* @__stpncpy_chk(i8* noundef getelementptr inbounds ([5 x i8], [5 x i8]* @b, i32 0, i32 0), diff --git a/llvm/test/Transforms/InstCombine/stpncpy-1.ll b/llvm/test/Transforms/InstCombine/stpncpy-1.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/stpncpy-1.ll @@ -0,0 +1,247 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; +; Test that the stpncpy library call simplifier works correctly. +; +; RUN: opt < %s -data-layout="E" -passes=instcombine -S | FileCheck %s --check-prefixes=ANY,BE +; RUN: opt < %s -data-layout="e" -passes=instcombine -S | FileCheck %s --check-prefixes=ANY,LE + +declare i8* @stpncpy(i8*, i8*, i64) + +@s4 = constant [5 x i8] c"1234\00" + +; The following are generated by the stpncpy -> memcpy transformation +; (trading space for speed). +@str = private constant [4 x i8] c"4\00\00\00" +@str.1 = private constant [10 x i8] c"4\00\00\00\00\00\00\00\00\00" +@str.2 = private constant [10 x i8] c"1234\00\00\00\00\00\00" + +; Verify that stpncpy(D, "", N) calls are transformed to a nul store +; to *D for nonzero N and folded to D for all values of N. + +define void @fold_stpncpy_s0(i8* %dst, i64 %n, i8** %pend) { +; ANY-LABEL: @fold_stpncpy_s0( +; ANY-NEXT: store i8* [[DST:%.*]], i8** [[PEND:%.*]], align 8 +; ANY-NEXT: store i8 0, i8* [[DST]], align 1 +; ANY-NEXT: store i8* [[DST]], i8** [[PEND]], align 8 +; ANY-NEXT: [[TMP1:%.*]] = bitcast i8* [[DST]] to i16* +; ANY-NEXT: store i16 0, i16* [[TMP1]], align 1 +; ANY-NEXT: store i8* [[DST]], i8** [[PEND]], align 8 +; ANY-NEXT: call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(9) [[DST]], i8 0, i64 9, i1 false) +; ANY-NEXT: store i8* [[DST]], i8** [[PEND]], align 8 +; ANY-NEXT: call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 1 [[DST]], i8 0, i64 [[N:%.*]], i1 false) +; ANY-NEXT: store i8* [[DST]], i8** [[PEND]], align 8 +; ANY-NEXT: ret void +; + %ps0 = getelementptr [5 x i8], [5 x i8]* @s4, i32 0, i32 4 + +; Fold stpncpy(D, "", 0) to just D. + %es0_0 = call i8* @stpncpy(i8* %dst, i8* %ps0, i64 0) + store i8* %es0_0, i8** %pend + +; Transform stpncpy(D, "", 1) to *D = '\0, D. + %es0_1 = call i8* @stpncpy(i8* %dst, i8* %ps0, i64 1) + store i8* %es0_1, i8** %pend + +; Transform stpncpy(D, "", 2) to memset(D, 0, 2), D. + %es0_2 = call i8* @stpncpy(i8* %dst, i8* %ps0, i64 2) + store i8* %es0_2, i8** %pend + +; Transform stpncpy(D, "", 9) to memset(D, 0, 9), D. + %es0_9 = call i8* @stpncpy(i8* %dst, i8* %ps0, i64 9) + store i8* %es0_9, i8** %pend + +; Transform stpncpy(D, "", n) to memset(D, 0, n), D. + %es0_n = call i8* @stpncpy(i8* %dst, i8* %ps0, i64 %n) + store i8* %es0_n, i8** %pend + + ret void +} + + +; Verify that stpncpy(D, "4", N) calls are transformed to the equivalent +; of strncpy and the result either folded to D if N == 0 and D + 1 otherwise. + +define void @fold_stpncpy_s1(i8* %dst, i64 %n, i8** %pend) { +; BE-LABEL: @fold_stpncpy_s1( +; BE-NEXT: store i8* [[DST:%.*]], i8** [[PEND:%.*]], align 8 +; BE-NEXT: store i8 52, i8* [[DST]], align 1 +; BE-NEXT: [[ENDPTR:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1 +; BE-NEXT: store i8* [[ENDPTR]], i8** [[PEND]], align 8 +; BE-NEXT: [[TMP1:%.*]] = bitcast i8* [[DST]] to i16* +; BE-NEXT: store i16 13312, i16* [[TMP1]], align 1 +; BE-NEXT: [[ENDPTR1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1 +; BE-NEXT: store i8* [[ENDPTR1]], i8** [[PEND]], align 8 +; BE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(3) [[DST]], i8* noundef nonnull align 1 dereferenceable(3) getelementptr inbounds ([4 x i8], [4 x i8]* @str.3, i64 0, i64 0), i64 3, i1 false) +; BE-NEXT: [[ENDPTR2:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1 +; BE-NEXT: store i8* [[ENDPTR2]], i8** [[PEND]], align 8 +; BE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(9) [[DST]], i8* noundef nonnull align 1 dereferenceable(9) getelementptr inbounds ([10 x i8], [10 x i8]* @str.4, i64 0, i64 0), i64 9, i1 false) +; BE-NEXT: [[ENDPTR3:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1 +; BE-NEXT: store i8* [[ENDPTR3]], i8** [[PEND]], align 8 +; BE-NEXT: ret void +; +; LE-LABEL: @fold_stpncpy_s1( +; LE-NEXT: store i8* [[DST:%.*]], i8** [[PEND:%.*]], align 8 +; LE-NEXT: store i8 52, i8* [[DST]], align 1 +; LE-NEXT: [[ENDPTR:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1 +; LE-NEXT: store i8* [[ENDPTR]], i8** [[PEND]], align 8 +; LE-NEXT: [[TMP1:%.*]] = bitcast i8* [[DST]] to i16* +; LE-NEXT: store i16 52, i16* [[TMP1]], align 1 +; LE-NEXT: [[ENDPTR1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1 +; LE-NEXT: store i8* [[ENDPTR1]], i8** [[PEND]], align 8 +; LE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(3) [[DST]], i8* noundef nonnull align 1 dereferenceable(3) getelementptr inbounds ([4 x i8], [4 x i8]* @str.3, i64 0, i64 0), i64 3, i1 false) +; LE-NEXT: [[ENDPTR2:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1 +; LE-NEXT: store i8* [[ENDPTR2]], i8** [[PEND]], align 8 +; LE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(9) [[DST]], i8* noundef nonnull align 1 dereferenceable(9) getelementptr inbounds ([10 x i8], [10 x i8]* @str.4, i64 0, i64 0), i64 9, i1 false) +; LE-NEXT: [[ENDPTR3:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1 +; LE-NEXT: store i8* [[ENDPTR3]], i8** [[PEND]], align 8 +; LE-NEXT: ret void +; + %ps1 = getelementptr [5 x i8], [5 x i8]* @s4, i32 0, i32 3 + +; Fold stpncpy(D, "4", 0) to just D. + %es1_0 = call i8* @stpncpy(i8* %dst, i8* %ps1, i64 0) + store i8* %es1_0, i8** %pend + +; Transform stpncpy(D, "4", 1) to *D = '4', D + 1. + %es1_1 = call i8* @stpncpy(i8* %dst, i8* %ps1, i64 1) + store i8* %es1_1, i8** %pend + +; Transform stpncpy(D, "4", 2) to strncpy(D, "4", 2) + 1. + %es1_2 = call i8* @stpncpy(i8* %dst, i8* %ps1, i64 2) + store i8* %es1_2, i8** %pend + +; Transform stpncpy(D, "4", 3) to strncpy(D, "4", 3) + 1, which is then +; transformed to memcpy(D, "4", 2), D + 1. + %es1_3 = call i8* @stpncpy(i8* %dst, i8* %ps1, i64 3) + store i8* %es1_3, i8** %pend + +; Transform stpncpy(D, "4", 9) to strncpy(D, "4", 9) + 1. + %es1_9 = call i8* @stpncpy(i8* %dst, i8* %ps1, i64 9) + store i8* %es1_9, i8** %pend + + ret void +} + +; Verify that stpncpy(D, "1234", N) calls are transformed to the equivalent +; of strncpy and the result either folded to D if N == 0 and D + 1 otherwise. + +define void @fold_stpncpy_s4(i8* %dst, i64 %n, i8** %pend) { +; BE-LABEL: @fold_stpncpy_s4( +; BE-NEXT: store i8* [[DST:%.*]], i8** [[PEND:%.*]], align 8 +; BE-NEXT: store i8 49, i8* [[DST]], align 1 +; BE-NEXT: [[ENDPTR:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4 +; BE-NEXT: store i8* [[ENDPTR]], i8** [[PEND]], align 8 +; BE-NEXT: [[TMP1:%.*]] = bitcast i8* [[DST]] to i16* +; BE-NEXT: store i16 12594, i16* [[TMP1]], align 1 +; BE-NEXT: [[ENDPTR1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4 +; BE-NEXT: store i8* [[ENDPTR1]], i8** [[PEND]], align 8 +; BE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(3) [[DST]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([5 x i8], [5 x i8]* @s4, i64 0, i64 0), i64 3, i1 false) +; BE-NEXT: [[ENDPTR2:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4 +; BE-NEXT: store i8* [[ENDPTR2]], i8** [[PEND]], align 8 +; BE-NEXT: [[TMP2:%.*]] = bitcast i8* [[DST]] to i32* +; BE-NEXT: store i32 825373492, i32* [[TMP2]], align 1 +; BE-NEXT: store i8* [[ENDPTR2]], i8** [[PEND]], align 8 +; BE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(9) [[DST]], i8* noundef nonnull align 1 dereferenceable(9) getelementptr inbounds ([10 x i8], [10 x i8]* @str.5, i64 0, i64 0), i64 9, i1 false) +; BE-NEXT: [[ENDPTR4:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4 +; BE-NEXT: store i8* [[ENDPTR4]], i8** [[PEND]], align 8 +; BE-NEXT: ret void +; +; LE-LABEL: @fold_stpncpy_s4( +; LE-NEXT: store i8* [[DST:%.*]], i8** [[PEND:%.*]], align 8 +; LE-NEXT: store i8 49, i8* [[DST]], align 1 +; LE-NEXT: [[ENDPTR:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4 +; LE-NEXT: store i8* [[ENDPTR]], i8** [[PEND]], align 8 +; LE-NEXT: [[TMP1:%.*]] = bitcast i8* [[DST]] to i16* +; LE-NEXT: store i16 12849, i16* [[TMP1]], align 1 +; LE-NEXT: [[ENDPTR1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4 +; LE-NEXT: store i8* [[ENDPTR1]], i8** [[PEND]], align 8 +; LE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(3) [[DST]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([5 x i8], [5 x i8]* @s4, i64 0, i64 0), i64 3, i1 false) +; LE-NEXT: [[ENDPTR2:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4 +; LE-NEXT: store i8* [[ENDPTR2]], i8** [[PEND]], align 8 +; LE-NEXT: [[TMP2:%.*]] = bitcast i8* [[DST]] to i32* +; LE-NEXT: store i32 875770417, i32* [[TMP2]], align 1 +; LE-NEXT: store i8* [[ENDPTR2]], i8** [[PEND]], align 8 +; LE-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(9) [[DST]], i8* noundef nonnull align 1 dereferenceable(9) getelementptr inbounds ([10 x i8], [10 x i8]* @str.5, i64 0, i64 0), i64 9, i1 false) +; LE-NEXT: [[ENDPTR4:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4 +; LE-NEXT: store i8* [[ENDPTR4]], i8** [[PEND]], align 8 +; LE-NEXT: ret void +; + %ps4 = getelementptr [5 x i8], [5 x i8]* @s4, i32 0, i32 0 + +; Fold stpncpy(D, "1234", 0) to just D. + %es4_0 = call i8* @stpncpy(i8* %dst, i8* %ps4, i64 0) + store i8* %es4_0, i8** %pend + +; Transform stpncpy(D, "1234", 1) to *D = '4', D + 1. + %es4_1 = call i8* @stpncpy(i8* %dst, i8* %ps4, i64 1) + store i8* %es4_1, i8** %pend + +; Transform stpncpy(D, "1234", 2) to strncpy(D, "1234", 2) + 2. + %es4_2 = call i8* @stpncpy(i8* %dst, i8* %ps4, i64 2) + store i8* %es4_2, i8** %pend + +; Transform stpncpy(D, "1234", 3) to strncpy(D, "1234", 3) + 3 + %es4_3 = call i8* @stpncpy(i8* %dst, i8* %ps4, i64 3) + store i8* %es4_3, i8** %pend + +; Transform stpncpy(D, "1234", 4) to strncpy(D, "1234", 4) + 4. + %es4_4 = call i8* @stpncpy(i8* %dst, i8* %ps4, i64 4) + store i8* %es4_3, i8** %pend + +; Transform stpncpy(D, "1234", 9) to strncpy(D, "1234", 9) + 4. + %es4_9 = call i8* @stpncpy(i8* %dst, i8* %ps4, i64 9) + store i8* %es4_9, i8** %pend + + ret void +} + + +; Verify that stpncpy(D, S, N) calls with N < 2 are transformed to +; the equivalent of strncpy and either folded to D if N == 0 or to +; *D ? D + 1 : D otherwise. + +define void @fold_stpncpy_s(i8* %dst, i8* %src, i8** %pend) { +; ANY-LABEL: @fold_stpncpy_s( +; ANY-NEXT: store i8* [[DST:%.*]], i8** [[PEND:%.*]], align 8 +; ANY-NEXT: [[STXNCPY_CHAR0:%.*]] = load i8, i8* [[SRC:%.*]], align 1 +; ANY-NEXT: store i8 [[STXNCPY_CHAR0]], i8* [[DST]], align 1 +; ANY-NEXT: [[STPNCPY_CHAR0CMP:%.*]] = icmp ne i8 [[STXNCPY_CHAR0]], 0 +; ANY-NEXT: [[STPNCPY_SEL_IDX:%.*]] = zext i1 [[STPNCPY_CHAR0CMP]] to i64 +; ANY-NEXT: [[STPNCPY_SEL:%.*]] = getelementptr i8, i8* [[DST]], i64 [[STPNCPY_SEL_IDX]] +; ANY-NEXT: store i8* [[STPNCPY_SEL]], i8** [[PEND]], align 8 +; ANY-NEXT: ret void +; +; Fold stpncpy(D, S, 0) to just D. + %es_0 = call i8* @stpncpy(i8* %dst, i8* %src, i64 0) + store i8* %es_0, i8** %pend + +; Transform stpncpy(D, "", 1) to *D = '\0, D. + %es_1 = call i8* @stpncpy(i8* %dst, i8* %src, i64 1) + store i8* %es_1, i8** %pend + + ret void +} + + +; Verify that stpncpy(D, S, N) calls with N >= 2 are not transformed. +; In theory they could be transformed to the equivalent of the following +; though it's not clear that it would be a win: +; P = memccpy(D, S, 0, N) +; N' = P ? N - (P - D) : 0 +; Q = P ? P : D + N +; memset(Q, 0, N') +; Q + +define void @call_stpncpy_s(i8* %dst, i8* %src, i64 %n, i8** %pend) { +; ANY-LABEL: @call_stpncpy_s( +; ANY-NEXT: [[ES_N:%.*]] = call i8* @stpncpy(i8* noundef nonnull dereferenceable(1) [[DST:%.*]], i8* [[SRC:%.*]], i64 [[N:%.*]]) +; ANY-NEXT: store i8* [[ES_N]], i8** [[PEND:%.*]], align 8 +; ANY-NEXT: ret void +; + +; Do not transform stpncpy(D, S, N). + %es_n = call i8* @stpncpy(i8* %dst, i8* %src, i64 %n) + store i8* %es_n, i8** %pend + + ret void +} diff --git a/llvm/test/Transforms/InstCombine/strcpy-nonzero-as.ll b/llvm/test/Transforms/InstCombine/strcpy-nonzero-as.ll --- a/llvm/test/Transforms/InstCombine/strcpy-nonzero-as.ll +++ b/llvm/test/Transforms/InstCombine/strcpy-nonzero-as.ll @@ -60,12 +60,11 @@ ret void } -; Note: stpncpy is not handled by SimplifyLibcalls yet, so this should not be changed. define void @test_stpncpy_to_memcpy(i8 addrspace(200)* %dst) addrspace(200) nounwind { ; CHECK-LABEL: define {{[^@]+}}@test_stpncpy_to_memcpy ; CHECK-SAME: (i8 addrspace(200)* [[DST:%.*]]) addrspace(200) #[[ATTR1]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = call addrspace(200) i8 addrspace(200)* @stpncpy(i8 addrspace(200)* [[DST]], i8 addrspace(200)* getelementptr inbounds ([17 x i8], [17 x i8] addrspace(200)* @str, i64 0, i64 0), i64 17) +; CHECK-NEXT: call addrspace(200) void @llvm.memcpy.p200i8.p200i8.i128(i8 addrspace(200)* noundef align 1 dereferenceable(17) [[DST]], i8 addrspace(200)* noundef align 1 dereferenceable(17) getelementptr inbounds ([17 x i8], [17 x i8] addrspace(200)* @str, i64 0, i64 0), i128 17, i1 false) ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/InstCombine/strncpy-1.ll b/llvm/test/Transforms/InstCombine/strncpy-1.ll --- a/llvm/test/Transforms/InstCombine/strncpy-1.ll +++ b/llvm/test/Transforms/InstCombine/strncpy-1.ll @@ -107,7 +107,7 @@ define void @test_simplify7(i8* %dst, i32 %n) { ; CHECK-LABEL: @test_simplify7( -; CHECK-NEXT: [[TMP1:%.*]] = call i8* @strncpy(i8* noundef nonnull dereferenceable(80) [[DST:%.*]], i8* getelementptr inbounds ([1 x i8], [1 x i8]* @null, i32 0, i32 0), i32 [[N:%.*]]) +; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* noundef nonnull align 1 dereferenceable(80) [[DST:%.*]], i8 0, i32 [[N:%.*]], i1 false) ; CHECK-NEXT: ret void ; %src = getelementptr [1 x i8], [1 x i8]* @null, i32 0, i32 0 @@ -182,8 +182,8 @@ define i8* @test_no_simplify3(i8* %dst, i8* %src, i32 %count) { ; CHECK-LABEL: @test_no_simplify3( -; CHECK-NEXT: %ret = musttail call i8* @strncpy(i8* %dst, i8* %src, i32 32) -; CHECK-NEXT: ret i8* %ret +; CHECK-NEXT: [[RET:%.*]] = musttail call i8* @strncpy(i8* [[DST:%.*]], i8* [[SRC:%.*]], i32 32) +; CHECK-NEXT: ret i8* [[RET]] ; %ret = musttail call i8* @strncpy(i8* %dst, i8* %src, i32 32) ret i8* %ret @@ -191,8 +191,8 @@ define i8* @test_no_simplify4(i8* %dst, i8* %src, i32 %count) { ; CHECK-LABEL: @test_no_simplify4( -; CHECK-NEXT: %ret = musttail call i8* @strncpy(i8* %dst, i8* %src, i32 6) -; CHECK-NEXT: ret i8* %ret +; CHECK-NEXT: [[RET:%.*]] = musttail call i8* @strncpy(i8* [[DST:%.*]], i8* [[SRC:%.*]], i32 6) +; CHECK-NEXT: ret i8* [[RET]] ; %ret = musttail call i8* @strncpy(i8* %dst, i8* %src, i32 6) ret i8* %ret diff --git a/llvm/test/Transforms/InstCombine/strncpy-4.ll b/llvm/test/Transforms/InstCombine/strncpy-4.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/strncpy-4.ll @@ -0,0 +1,107 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; +; Test that strncpy(D, S, N) calls with the empty string S as a source +; are simplified for all values of N. +; +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +declare i8* @strncpy(i8*, i8*, i64) + +; A string of length 4 but size 9 to also verify that characters after +; the nul don't affect the transformation. +@s4 = constant [9 x i8] c"1234\00567\00" + + +; Verify that strncpy(D, "", N) calls are transformed to a nul store +; to *D for nonzero N and folded to D for all values of N. + +define void @fold_strncpy_s0(i8* %dst, i64 %n, i8** %pend) { +; CHECK-LABEL: @fold_strncpy_s0( +; CHECK-NEXT: store i8* [[DST:%.*]], i8** [[PEND:%.*]], align 8 +; CHECK-NEXT: store i8 0, i8* [[DST]], align 1 +; CHECK-NEXT: store i8* [[DST]], i8** [[PEND]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[DST]] to i16* +; CHECK-NEXT: store i16 0, i16* [[TMP1]], align 1 +; CHECK-NEXT: store i8* [[DST]], i8** [[PEND]], align 8 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(9) [[DST]], i8 0, i64 9, i1 false) +; CHECK-NEXT: store i8* [[DST]], i8** [[PEND]], align 8 +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 1 [[DST]], i8 0, i64 [[N:%.*]], i1 false) +; CHECK-NEXT: store i8* [[DST]], i8** [[PEND]], align 8 +; CHECK-NEXT: ret void +; + %ps0 = getelementptr [9 x i8], [9 x i8]* @s4, i32 0, i32 4 + +; Fold strncpy(D, "", 0) to just D. + %es0_0 = call i8* @strncpy(i8* %dst, i8* %ps0, i64 0) + store i8* %es0_0, i8** %pend + +; Transform strncpy(D, "", 1) to *D = '\0, D. + %es0_1 = call i8* @strncpy(i8* %dst, i8* %ps0, i64 1) + store i8* %es0_1, i8** %pend + +; Transform strncpy(D, "", 2) to memset(D, 0, 2), D. + %es0_2 = call i8* @strncpy(i8* %dst, i8* %ps0, i64 2) + store i8* %es0_2, i8** %pend + +; Transform strncpy(D, "", 9) to memset(D, 0, 9), D. + %es0_9 = call i8* @strncpy(i8* %dst, i8* %ps0, i64 9) + store i8* %es0_9, i8** %pend + +; Transform strncpy(D, "", n) to memset(D, 0, n), D. + %es0_n = call i8* @strncpy(i8* %dst, i8* %ps0, i64 %n) + store i8* %es0_n, i8** %pend + + ret void +} + + +; Verify that strncpy(D, S, N) calls with nonconstant source S and constant +; size are simplified when N < 2. + +define void @fold_strncpy_s(i8* %dst, i8* %src, i64 %n, i8** %pend) { +; CHECK-LABEL: @fold_strncpy_s( +; CHECK-NEXT: store i8* [[DST:%.*]], i8** [[PEND:%.*]], align 8 +; CHECK-NEXT: [[STXNCPY_CHAR0:%.*]] = load i8, i8* [[SRC:%.*]], align 1 +; CHECK-NEXT: store i8 [[STXNCPY_CHAR0]], i8* [[DST]], align 1 +; CHECK-NEXT: store i8* [[DST]], i8** [[PEND]], align 8 +; CHECK-NEXT: ret void +; +; Fold strncpy(D, S, 0) to just D. + %es_0 = call i8* @strncpy(i8* %dst, i8* %src, i64 0) + store i8* %es_0, i8** %pend + +; Transform strncpy(D, S, 1) to *D = '\0, D. + %es_1 = call i8* @strncpy(i8* %dst, i8* %src, i64 1) + store i8* %es_1, i8** %pend + + ret void +} + + +; Verify that strncpy(D, S, N) calls with nonconstant source S and constant +; size are not transformed when N is either unknown or greater than one. + +define void @call_strncpy_s(i8* %dst, i8* %src, i64 %n, i8** %pend) { +; CHECK-LABEL: @call_strncpy_s( +; CHECK-NEXT: [[ES_2:%.*]] = call i8* @strncpy(i8* noundef nonnull dereferenceable(1) [[DST:%.*]], i8* noundef nonnull dereferenceable(1) [[SRC:%.*]], i64 2) +; CHECK-NEXT: store i8* [[ES_2]], i8** [[PEND:%.*]], align 8 +; CHECK-NEXT: [[ES_9:%.*]] = call i8* @strncpy(i8* noundef nonnull dereferenceable(1) [[DST]], i8* noundef nonnull dereferenceable(1) [[SRC]], i64 9) +; CHECK-NEXT: store i8* [[ES_9]], i8** [[PEND]], align 8 +; CHECK-NEXT: [[ES_N:%.*]] = call i8* @strncpy(i8* noundef nonnull dereferenceable(1) [[DST]], i8* [[SRC]], i64 [[N:%.*]]) +; CHECK-NEXT: store i8* [[ES_N]], i8** [[PEND]], align 8 +; CHECK-NEXT: ret void +; +; Do not transform strncpy(D, S, 2). + %es_2 = call i8* @strncpy(i8* %dst, i8* %src, i64 2) + store i8* %es_2, i8** %pend + +; Do not transform strncpy(D, S, 9). + %es_9 = call i8* @strncpy(i8* %dst, i8* %src, i64 9) + store i8* %es_9, i8** %pend + +; Do not transform strncpy(D, S, N) when N is unknown. + %es_n = call i8* @strncpy(i8* %dst, i8* %src, i64 %n) + store i8* %es_n, i8** %pend + + ret void +}