Index: include/llvm/Analysis/ValueTracking.h =================================================================== --- include/llvm/Analysis/ValueTracking.h +++ include/llvm/Analysis/ValueTracking.h @@ -274,7 +274,7 @@ /// If we can compute the length of the string pointed to by the specified /// pointer, return 'len+1'. If we can't, return 0. - uint64_t GetStringLength(const Value *V, unsigned CharSize = 8); + uint64_t GetStringLength(const Value *V, const TargetLibraryInfo *TLI, unsigned CharSize = 8); /// This method strips off any GEP address adjustments and pointer casts from /// the specified value, returning the original object being addressed. Note Index: lib/Analysis/MemoryBuiltins.cpp =================================================================== --- lib/Analysis/MemoryBuiltins.cpp +++ lib/Analysis/MemoryBuiltins.cpp @@ -589,7 +589,7 @@ // Handle strdup-like functions separately. if (FnData->AllocTy == StrDupLike) { - APInt Size(IntTyBits, GetStringLength(CS.getArgument(0))); + APInt Size(IntTyBits, GetStringLength(CS.getArgument(0), TLI)); if (!Size) return unknown(); Index: lib/Analysis/ValueTracking.cpp =================================================================== --- lib/Analysis/ValueTracking.cpp +++ lib/Analysis/ValueTracking.cpp @@ -3369,10 +3369,39 @@ return NullIndex + 1; } +static bool isStringFromCalloc(const Value *Str, const TargetLibraryInfo *TLI) { + const CallInst *Calloc = dyn_cast(Str); + if (!Calloc) + return false; + + const Function *InnerCallee = Calloc->getCalledFunction(); + if (!InnerCallee) + return false; + + LibFunc Func; + if (!TLI->getLibFunc(*InnerCallee, Func) || !TLI->has(Func) || + Func != LibFunc_calloc) + return false; + + const ConstantInt *N = dyn_cast(Calloc->getOperand(0)); + const ConstantInt *Size = dyn_cast(Calloc->getOperand(1)); + + if (!N || !Size) + return false; + + if (N->isNullValue() || Size->isNullValue()) + return false; + + return true; +} + /// If we can compute the length of the string pointed to by /// the specified pointer, return 'len+1'. If we can't, return 0. -uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) { - if (!V->getType()->isPointerTy()) return 0; +uint64_t llvm::GetStringLength(const Value *V, const TargetLibraryInfo *TLI, unsigned CharSize) { + if (!V->getType()->isPointerTy()) + return 0; + if (isStringFromCalloc(V, TLI)) + return 1; SmallPtrSet PHIs; uint64_t Len = GetStringLengthH(V, PHIs, CharSize); Index: lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- lib/Transforms/Utils/SimplifyLibCalls.cpp +++ lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -159,17 +159,22 @@ Value *Dst = CI->getArgOperand(0); Value *Src = CI->getArgOperand(1); + uint64_t DstLen = GetStringLength(Dst, TLI); + if (DstLen == 1) // '\0' + return emitStrCpy(Dst, Src, B, TLI); + + // See if we can get the length of the input string. - uint64_t Len = GetStringLength(Src); - if (Len == 0) + uint64_t SrcLen = GetStringLength(Src, TLI); + if (SrcLen == 0) return nullptr; - --Len; // Unbias length. + --SrcLen; // Unbias length. // Handle the simple, do-nothing case: strcat(x, "") -> x - if (Len == 0) + if (SrcLen == 0) return Dst; - return emitStrLenMemCpy(Src, Dst, Len, B); + return emitStrLenMemCpy(Src, Dst, SrcLen, B); } Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, @@ -196,16 +201,23 @@ // Extract some information from the instruction. Value *Dst = CI->getArgOperand(0); Value *Src = CI->getArgOperand(1); + Value *N = CI->getArgOperand(2); uint64_t Len; + // strncat("\0", str, len) -> strncpy("\0", str, len)) + uint64_t DstLen = GetStringLength(Dst, TLI); + if (DstLen == 1) { // '\0' + return emitStrNCpy(Dst, Src, N, B, TLI); + } + // We don't do anything if length is not constant. - if (ConstantInt *LengthArg = dyn_cast(CI->getArgOperand(2))) + if (ConstantInt *LengthArg = dyn_cast(N)) Len = LengthArg->getZExtValue(); else return nullptr; // See if we can get the length of the input string. - uint64_t SrcLen = GetStringLength(Src); + uint64_t SrcLen = GetStringLength(Src, TLI); if (SrcLen == 0) return nullptr; --SrcLen; // Unbias length. @@ -234,7 +246,7 @@ // of the input string and turn this into memchr. ConstantInt *CharC = dyn_cast(CI->getArgOperand(1)); if (!CharC) { - uint64_t Len = GetStringLength(SrcStr); + uint64_t Len = GetStringLength(SrcStr, TLI); if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32. return nullptr; @@ -313,8 +325,8 @@ return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); // strcmp(P, "x") -> memcmp(P, "x", 2) - uint64_t Len1 = GetStringLength(Str1P); - uint64_t Len2 = GetStringLength(Str2P); + uint64_t Len1 = GetStringLength(Str1P, TLI); + uint64_t Len2 = GetStringLength(Str2P, TLI); if (Len1 && Len2) { return emitMemCmp(Str1P, Str2P, ConstantInt::get(DL.getIntPtrType(CI->getContext()), @@ -370,7 +382,7 @@ return Src; // See if we can get the length of the input string. - uint64_t Len = GetStringLength(Src); + uint64_t Len = GetStringLength(Src, TLI); if (Len == 0) return nullptr; @@ -390,7 +402,7 @@ } // See if we can get the length of the input string. - uint64_t Len = GetStringLength(Src); + uint64_t Len = GetStringLength(Src, TLI); if (Len == 0) return nullptr; @@ -412,7 +424,7 @@ Value *LenOp = CI->getArgOperand(2); // See if we can get the length of the input string. - uint64_t SrcLen = GetStringLength(Src); + uint64_t SrcLen = GetStringLength(Src, TLI); if (SrcLen == 0) return nullptr; --SrcLen; @@ -448,7 +460,7 @@ Value *Src = CI->getArgOperand(0); // Constant folding: strlen("xyz") -> 3 - if (uint64_t Len = GetStringLength(Src, CharSize)) + if (uint64_t Len = GetStringLength(Src, TLI, CharSize)) return ConstantInt::get(CI->getType(), Len - 1); // If s is a constant pointer pointing to a string literal, we can fold @@ -512,8 +524,8 @@ // strlen(x?"foo":"bars") --> x ? 3 : 4 if (SelectInst *SI = dyn_cast(Src)) { - uint64_t LenTrue = GetStringLength(SI->getTrueValue(), CharSize); - uint64_t LenFalse = GetStringLength(SI->getFalseValue(), CharSize); + uint64_t LenTrue = GetStringLength(SI->getTrueValue(), TLI, CharSize); + uint64_t LenFalse = GetStringLength(SI->getFalseValue(), TLI, CharSize); if (LenTrue && LenFalse) { ORE.emit([&]() { return OptimizationRemark("instcombine", "simplify-libcalls", CI) @@ -2142,7 +2154,7 @@ } // fputs(s,F) --> fwrite(s,1,strlen(s),F) - uint64_t Len = GetStringLength(CI->getArgOperand(0)); + uint64_t Len = GetStringLength(CI->getArgOperand(0), TLI); if (!Len) return nullptr; @@ -2565,7 +2577,7 @@ if (OnlyLowerUnknownSize) return false; if (isString) { - uint64_t Len = GetStringLength(CI->getArgOperand(SizeOp)); + uint64_t Len = GetStringLength(CI->getArgOperand(SizeOp), TLI); // If the length is 0 we don't know how long it is and so we can't // remove the check. if (Len == 0) @@ -2637,7 +2649,7 @@ return nullptr; // Maybe we can stil fold __st[rp]cpy_chk to __memcpy_chk. - uint64_t Len = GetStringLength(Src); + uint64_t Len = GetStringLength(Src, TLI); if (Len == 0) return nullptr; Index: test/Transforms/InstCombine/zero-string.ll =================================================================== --- test/Transforms/InstCombine/zero-string.ll +++ test/Transforms/InstCombine/zero-string.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" + +declare i32 @strlen(i8* nocapture) +declare noalias i8* @calloc(i32, i32) +declare noalias i8* @malloc(i32) +declare i8* @strcat(i8*, i8* nocapture readonly) +declare i8* @strncat(i8*, i8* nocapture readonly, i32) + +define i32 @calloc_strlen() { +; CHECK-LABEL: @calloc_strlen( +; CHECK-NEXT: ret i32 0 +; + %call = tail call noalias i8* @calloc(i32 10, i32 1) + %call1 = tail call i32 @strlen(i8* %call) + ret i32 %call1 +} + +define i32 @calloc_strlen_not_const_nmemb(i32 %n) { +; CHECK-LABEL: @calloc_strlen_not_const_nmemb( +; CHECK-NEXT: [[CALL:%.*]] = tail call noalias i8* @calloc(i32 [[N:%.*]], i32 10) +; CHECK-NEXT: [[CALL1:%.*]] = tail call i32 @strlen(i8* [[CALL]]) +; CHECK-NEXT: ret i32 [[CALL1]] +; + %call = tail call noalias i8* @calloc(i32 %n, i32 10) + %call1 = tail call i32 @strlen(i8* %call) #4 + ret i32 %call1 +} + + +define i32 @calloc_strlen_not_const_size(i32 %size) { +; CHECK-LABEL: @calloc_strlen_not_const_size( +; CHECK-NEXT: [[CALL:%.*]] = tail call noalias i8* @calloc(i32 1, i32 [[SIZE:%.*]]) +; CHECK-NEXT: [[CALL1:%.*]] = tail call i32 @strlen(i8* [[CALL]]) +; CHECK-NEXT: ret i32 [[CALL1]] +; + %call = tail call noalias i8* @calloc(i32 1, i32 %size) + %call1 = tail call i32 @strlen(i8* %call) #4 + ret i32 %call1 +} + + +define i32 @calloc_strlen_not_const_args(i32 %n, i32 %size) { +; CHECK-LABEL: @calloc_strlen_not_const_args( +; CHECK-NEXT: [[CALL:%.*]] = tail call noalias i8* @calloc(i32 [[N:%.*]], i32 [[SIZE:%.*]]) +; CHECK-NEXT: [[CALL1:%.*]] = tail call i32 @strlen(i8* [[CALL]]) +; CHECK-NEXT: ret i32 [[CALL1]] +; + %call = tail call noalias i8* @calloc(i32 %n, i32 %size) + %call1 = tail call i32 @strlen(i8* %call) #4 + ret i32 %call1 +} + + +define i32 @malloc_strlen() { +; CHECK-LABEL: @malloc_strlen( +; CHECK-NEXT: [[CALL:%.*]] = tail call noalias i8* @malloc(i32 10) +; CHECK-NEXT: [[CALL1:%.*]] = tail call i32 @strlen(i8* [[CALL]]) +; CHECK-NEXT: ret i32 [[CALL1]] +; + %call = tail call noalias i8* @malloc(i32 10) + %call1 = tail call i32 @strlen(i8* %call) + ret i32 %call1 +} + +define i8* @malloc_strcat(i8* %str2) { +; CHECK-LABEL: @malloc_strcat( +; CHECK-NEXT: [[CALL:%.*]] = tail call noalias i8* @malloc(i32 10) +; CHECK-NEXT: [[CALL1:%.*]] = tail call i8* @strcat(i8* [[CALL]], i8* [[STR2:%.*]]) +; CHECK-NEXT: ret i8* [[CALL1]] +; + %call = tail call noalias i8* @malloc(i32 10) #3 + %call1 = tail call i8* @strcat(i8* %call, i8* %str2) + ret i8* %call1 +} + +define i8* @calloc_strcat(i8* %str2) { +; CHECK-LABEL: @calloc_strcat( +; CHECK-NEXT: [[CALL:%.*]] = tail call noalias i8* @calloc(i32 10, i32 1) +; CHECK-NEXT: [[STRCPY:%.*]] = call i8* @strcpy(i8* [[CALL]], i8* [[STR2:%.*]]) +; CHECK-NEXT: ret i8* [[STRCPY]] +; + %call = tail call noalias i8* @calloc(i32 10, i32 1) + %call1 = call i8* @strcat(i8* %call, i8* %str2) + ret i8* %call1 +} + + +define i8* @calloc_strncat(i8* %str2) { +; CHECK-LABEL: @calloc_strncat( +; CHECK-NEXT: [[CALL:%.*]] = tail call noalias i8* @calloc(i32 10, i32 1) +; CHECK-NEXT: [[STRNCPY:%.*]] = call i8* @strncpy(i8* [[CALL]], i8* [[STR2:%.*]], i32 10) +; CHECK-NEXT: ret i8* [[STRNCPY]] +; + %call = tail call noalias i8* @calloc(i32 10, i32 1) + %call1 = tail call i8* @strncat(i8* %call, i8* %str2, i32 10) + ret i8* %call1 +} + +define i8* @malloc_strncat(i8* %str2) { +; CHECK-LABEL: @malloc_strncat( +; CHECK-NEXT: [[CALL:%.*]] = tail call noalias i8* @malloc(i32 10) +; CHECK-NEXT: [[CALL1:%.*]] = tail call i8* @strncat(i8* [[CALL]], i8* [[STR2:%.*]], i32 10) +; CHECK-NEXT: ret i8* [[CALL1]] +; + %call = tail call noalias i8* @malloc(i32 10) + %call1 = tail call i8* @strncat(i8* %call, i8* %str2, i32 10) + ret i8* %call1 +} + +define i8* @calloc_strncat_not_const(i8* %str2, i32 %n) { +; CHECK-LABEL: @calloc_strncat_not_const( +; CHECK-NEXT: [[CALL:%.*]] = tail call noalias i8* @calloc(i32 10, i32 1) +; CHECK-NEXT: [[STRNCPY:%.*]] = call i8* @strncpy(i8* [[CALL]], i8* [[STR2:%.*]], i32 [[N:%.*]]) +; CHECK-NEXT: ret i8* [[STRNCPY]] +; + %call = tail call noalias i8* @calloc(i32 10, i32 1) #3 + %call1 = tail call i8* @strncat(i8* %call, i8* %str2, i32 %n) + ret i8* %call1 +}