Index: llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -295,31 +295,69 @@ return copyFlags(*CI, emitStrLenMemCpy(Src, Dst, SrcLen, B)); } +// Helper to transform memchr(S, C, N) == S to N && *S == C and, when +// NBytesis null, strchr(S, C) to *S == C. A precondition of the function +// is that either S is dereferenceable or the value of N is nonzero. +static Value* MemChrToCharCompare(CallInst *CI, Value *NBytes, + IRBuilderBase &B, const DataLayout &DL) +{ + Value *Src = CI->getArgOperand(0); + Value *CharVal = CI->getArgOperand(1); + + // Fold memchr(A, C, N) == A to N && *A == C. + Type *CharTy = B.getInt8Ty(); + Value *Char0 = B.CreateLoad(CharTy, Src); + CharVal = B.CreateTrunc(CharVal, CharTy); + Value *Cmp = B.CreateICmpEQ(Char0, CharVal, "char0cmp"); + + if (NBytes) { + Value *Zero = ConstantInt::get(NBytes->getType(), 0); + Value *And = B.CreateICmpNE(NBytes, Zero); + Cmp = B.CreateLogicalAnd(And, Cmp); + } + + Value *NullPtr = Constant::getNullValue(CI->getType()); + return B.CreateSelect(Cmp, Src, NullPtr); +} + Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilderBase &B) { - Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); Value *SrcStr = CI->getArgOperand(0); + Value *CharVal = CI->getArgOperand(1); annotateNonNullNoUndefBasedOnAccess(CI, 0); + if (isOnlyUsedInEqualityComparison(CI, SrcStr)) + return MemChrToCharCompare(CI, nullptr, B, DL); + // If the second operand is non-constant, see if we can compute the length // of the input string and turn this into memchr. - ConstantInt *CharC = dyn_cast(CI->getArgOperand(1)); + ConstantInt *CharC = dyn_cast(CharVal); if (!CharC) { uint64_t Len = GetStringLength(SrcStr); if (Len) annotateDereferenceableBytes(CI, 0, Len); else return nullptr; + + Function *Callee = CI->getCalledFunction(); + FunctionType *FT = Callee->getFunctionType(); if (!FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32. return nullptr; return copyFlags( *CI, - emitMemChr(SrcStr, CI->getArgOperand(1), // include nul. + emitMemChr(SrcStr, CharVal, // include nul. ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len), B, DL, TLI)); } + if (CharC->isZero()) { + Value *NullPtr = Constant::getNullValue(CI->getType()); + if (isOnlyUsedInEqualityComparison(CI, NullPtr)) + // Pre-empt the transformation to strlen below and fold + // strchr(A, '\0') == null to false. + return B.CreateIntToPtr(B.getTrue(), CI->getType()); + } + // Otherwise, the character is a constant, see if the first argument is // a string literal. If so, we can constant fold. StringRef Str; @@ -1014,8 +1052,12 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) { Value *SrcStr = CI->getArgOperand(0); Value *Size = CI->getArgOperand(2); - if (isKnownNonZero(Size, DL)) + + if (isKnownNonZero(Size, DL)) { annotateNonNullNoUndefBasedOnAccess(CI, 0); + if (isOnlyUsedInEqualityComparison(CI, SrcStr)) + return MemChrToCharCompare(CI, Size, B, DL); + } Value *CharVal = CI->getArgOperand(1); ConstantInt *CharC = dyn_cast(CharVal); @@ -1105,9 +1147,16 @@ return B.CreateSelect(And, SrcStr, Sel1, "memchr.sel2"); } - if (!LenC) + if (!LenC) { + if (isOnlyUsedInEqualityComparison(CI, SrcStr)) + // S is dereferenceable so it's safe to load from it and fold + // memcmp(S, C, N) == S to N && *S == C for any C and N. + // TODO: This is safe even even for nonconstant S. + return MemChrToCharCompare(CI, Size, B, DL); + // From now on we need a constant length and constant array. return nullptr; + } // If the char is variable but the input str and length are not we can turn // this memchr call into a simple bit field test. Of course this only works Index: llvm/test/Transforms/InstCombine/memchr-11.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/memchr-11.ll @@ -0,0 +1,121 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=instcombine -S | FileCheck %s +; +; Verify that the result of memchr calls used in equality expressions +; with either the first argument or null are optimally folded. + +declare i8* @memchr(i8*, i32, i64) + + +@a5 = constant [5 x i8] c"12345" + +; Fold memchr(a5, c, 5) == a5 to *a5 == c. + +define i1 @fold_memchr_a_c_5_eq_a(i32 %c) { +; CHECK-LABEL: @fold_memchr_a_c_5_eq_a( +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[C:%.*]] to i8 +; CHECK-NEXT: [[CHAR0CMP:%.*]] = icmp eq i8 [[TMP1]], 49 +; CHECK-NEXT: ret i1 [[CHAR0CMP]] +; + %p = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 0 + %q = call i8* @memchr(i8* %p, i32 %c, i64 5) + %cmp = icmp eq i8* %q, %p + ret i1 %cmp +} + + +; Fold memchr(a5, c, n) == a5 to n && *a5 == c. Unlike the case when +; the first argument is an arbitrary, including potentially past-the-end, +; pointer, this is safe because a5 is dereferenceable. + +define i1 @fold_memchr_a_c_n_eq_a(i32 %c, i64 %n) { +; CHECK-LABEL: @fold_memchr_a_c_n_eq_a( +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[C:%.*]] to i8 +; CHECK-NEXT: [[CHAR0CMP:%.*]] = icmp eq i8 [[TMP1]], 49 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[N:%.*]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i1 [[CHAR0CMP]], i1 false +; CHECK-NEXT: ret i1 [[TMP3]] +; + %p = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 0 + %q = call i8* @memchr(i8* %p, i32 %c, i64 %n) + %cmp = icmp eq i8* %q, %p + ret i1 %cmp +} + + +; Do not fold memchr(a5 + i, c, n). + +define i1 @call_memchr_api_c_n_eq_a(i64 %i, i32 %c, i64 %n) { +; CHECK-LABEL: @call_memchr_api_c_n_eq_a( +; CHECK-NEXT: [[P:%.*]] = getelementptr [5 x i8], [5 x i8]* @a5, i64 0, i64 [[I:%.*]] +; CHECK-NEXT: [[Q:%.*]] = call i8* @memchr(i8* [[P]], i32 [[C:%.*]], i64 [[N:%.*]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[Q]], [[P]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %p = getelementptr [5 x i8], [5 x i8]* @a5, i64 0, i64 %i + %q = call i8* @memchr(i8* %p, i32 %c, i64 %n) + %cmp = icmp eq i8* %q, %p + ret i1 %cmp +} + + +; Fold memchr(s, c, 15) == s to *s == c. + +define i1 @fold_memchr_s_c_15_eq_s(i8* %s, i32 %c) { +; CHECK-LABEL: @fold_memchr_s_c_15_eq_s( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[S:%.*]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[C:%.*]] to i8 +; CHECK-NEXT: [[CHAR0CMP:%.*]] = icmp eq i8 [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret i1 [[CHAR0CMP]] +; + %p = call i8* @memchr(i8* %s, i32 %c, i64 15) + %cmp = icmp eq i8* %p, %s + ret i1 %cmp +} + + +; Fold memchr(s, c, 17) != s to *s != c. + +define i1 @fold_memchr_s_c_17_neq_s(i8* %s, i32 %c) { +; CHECK-LABEL: @fold_memchr_s_c_17_neq_s( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[S:%.*]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[C:%.*]] to i8 +; CHECK-NEXT: [[CHAR0CMP:%.*]] = icmp ne i8 [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret i1 [[CHAR0CMP]] +; + %p = call i8* @memchr(i8* %s, i32 %c, i64 17) + %cmp = icmp ne i8* %p, %s + ret i1 %cmp +} + + +; Fold memchr(s, c, n) == s to *s == c for nonzero n. + +define i1 @fold_memchr_s_c_nz_eq_s(i8* %s, i32 %c, i64 %n) { +; CHECK-LABEL: @fold_memchr_s_c_nz_eq_s( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[S:%.*]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[C:%.*]] to i8 +; CHECK-NEXT: [[CHAR0CMP:%.*]] = icmp eq i8 [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret i1 [[CHAR0CMP]] +; + %nz = or i64 %n, 1 + %p = call i8* @memchr(i8* %s, i32 %c, i64 %nz) + %cmp = icmp eq i8* %p, %s + ret i1 %cmp +} + + +; But do not fold memchr(s, c, n) as above if n might be zero. This could +; be optimized to the equivalent of N && *S == C provided a short-circuiting +; AND, otherwise the load could read a byte just past the end of an array. + +define i1 @call_memchr_s_c_n_eq_s(i8* %s, i32 %c, i64 %n) { +; CHECK-LABEL: @call_memchr_s_c_n_eq_s( +; CHECK-NEXT: [[P:%.*]] = call i8* @memchr(i8* [[S:%.*]], i32 [[C:%.*]], i64 [[N:%.*]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[P]], [[S]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %p = call i8* @memchr(i8* %s, i32 %c, i64 %n) + %cmp = icmp eq i8* %p, %s + ret i1 %cmp +} Index: llvm/test/Transforms/InstCombine/memrchr-8.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/memrchr-8.ll @@ -0,0 +1,87 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=instcombine -S | FileCheck %s +; +; Verify that the result of memrchr calls used in equality expressions +; with the first argument aren't folded like the corresponding calls +; to memchr might be. +; Folding of equality expressions with the first argument plus the bound +; -1, i.e., memrchr(S, C, N) == N && S[N - 1] == C is not implemented. + +declare i8* @memrchr(i8*, i32, i64) + + +@a5 = constant [5 x i8] c"12345"; + +; Do not fold memrchr(a5, c, 9) == a5. The corresponding call to memchr +; is folded so this test verifies that the memrchr folder doesn't make +; the wrong assumption. The bound of 9 tries to avoid having to adjust +; the test if the call is folded into a series of ORs as in D128011. + +define i1 @call_memrchr_a_c_9_eq_a(i32 %c) { +; CHECK-LABEL: @call_memrchr_a_c_9_eq_a( +; CHECK-NEXT: [[Q:%.*]] = call i8* @memrchr(i8* noundef nonnull dereferenceable(9) getelementptr inbounds ([5 x i8], [5 x i8]* @a5, i64 0, i64 0), i32 [[C:%.*]], i64 9) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[Q]], getelementptr inbounds ([5 x i8], [5 x i8]* @a5, i64 0, i64 0) +; CHECK-NEXT: ret i1 [[CMP]] +; + %p = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 0 + %q = call i8* @memrchr(i8* %p, i32 %c, i64 9) + %cmp = icmp eq i8* %q, %p + ret i1 %cmp +} + + +; Do not fold memrchr(a5, c, n). + +define i1 @call_memrchr_a_c_n_eq_a(i32 %c, i64 %n) { +; CHECK-LABEL: @call_memrchr_a_c_n_eq_a( +; CHECK-NEXT: [[Q:%.*]] = call i8* @memrchr(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @a5, i64 0, i64 0), i32 [[C:%.*]], i64 [[N:%.*]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[Q]], getelementptr inbounds ([5 x i8], [5 x i8]* @a5, i64 0, i64 0) +; CHECK-NEXT: ret i1 [[CMP]] +; + %p = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 0 + %q = call i8* @memrchr(i8* %p, i32 %c, i64 %n) + %cmp = icmp eq i8* %q, %p + ret i1 %cmp +} + + +; Do not fold memrchr(s, c, 17). + +define i1 @call_memrchr_s_c_17_eq_s(i8* %s, i32 %c) { +; CHECK-LABEL: @call_memrchr_s_c_17_eq_s( +; CHECK-NEXT: [[P:%.*]] = call i8* @memrchr(i8* noundef nonnull dereferenceable(17) [[S:%.*]], i32 [[C:%.*]], i64 17) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[P]], [[S]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %p = call i8* @memrchr(i8* %s, i32 %c, i64 17) + %cmp = icmp eq i8* %p, %s + ret i1 %cmp +} + + +; Do not fold memrchr(s, c, 9). + +define i1 @call_memrchr_s_c_9_neq_s(i8* %s, i32 %c) { +; CHECK-LABEL: @call_memrchr_s_c_9_neq_s( +; CHECK-NEXT: [[P:%.*]] = call i8* @memrchr(i8* noundef nonnull dereferenceable(7) [[S:%.*]], i32 [[C:%.*]], i64 7) +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8* [[P]], [[S]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %p = call i8* @memrchr(i8* %s, i32 %c, i64 7) + %cmp = icmp ne i8* %p, %s + ret i1 %cmp +} + + +; Do not fold memrchr(s, c, n). + +define i1 @fold_memrchr_s_c_n_eq_s(i8* %s, i32 %c, i64 %n) { +; CHECK-LABEL: @fold_memrchr_s_c_n_eq_s( +; CHECK-NEXT: [[P:%.*]] = call i8* @memrchr(i8* [[S:%.*]], i32 [[C:%.*]], i64 [[N:%.*]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[P]], [[S]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %p = call i8* @memrchr(i8* %s, i32 %c, i64 %n) + %cmp = icmp eq i8* %p, %s + ret i1 %cmp +} Index: llvm/test/Transforms/InstCombine/strchr-4.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/strchr-4.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=instcombine -S | FileCheck %s +; +; Verify that the result of strchr calls used in equality expressions +; with either the first argument or null are optimally folded. + +declare i8* @strchr(i8*, i32) + + +; Fold strchr(s, c) == s to *s == c. + +define i1 @fold_strchr_s_c_eq_s(i8* %s, i32 %c) { +; CHECK-LABEL: @fold_strchr_s_c_eq_s( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[S:%.*]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[C:%.*]] to i8 +; CHECK-NEXT: [[CHAR0CMP:%.*]] = icmp eq i8 [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret i1 [[CHAR0CMP]] +; + %p = call i8* @strchr(i8* %s, i32 %c) + %cmp = icmp eq i8* %p, %s + ret i1 %cmp +} + + +; Fold strchr(s, c) != s to *s != c. + +define i1 @fold_strchr_s_c_neq_s(i8* %s, i32 %c) { +; CHECK-LABEL: @fold_strchr_s_c_neq_s( +; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[S:%.*]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[C:%.*]] to i8 +; CHECK-NEXT: [[CHAR0CMP:%.*]] = icmp ne i8 [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret i1 [[CHAR0CMP]] +; + %p = call i8* @strchr(i8* %s, i32 %c) + %cmp = icmp ne i8* %p, %s + ret i1 %cmp +} + + +; Fold strchr(s, '\0') == null to false. (A string must be nul-terminated, +; otherwise the call would read past the end of the array.) + +define i1 @fold_strchr_s_nul_eqz(i8* %s) { +; CHECK-LABEL: @fold_strchr_s_nul_eqz( +; CHECK-NEXT: ret i1 false +; + %p = call i8* @strchr(i8* %s, i32 0) + %cmp = icmp eq i8* %p, null + ret i1 %cmp +} + + +; Fold strchr(s, '\0') != null to true. + +define i1 @fold_strchr_s_nul_nez(i8* %s) { +; CHECK-LABEL: @fold_strchr_s_nul_nez( +; CHECK-NEXT: ret i1 true +; + %p = call i8* @strchr(i8* %s, i32 0) + %cmp = icmp ne i8* %p, null + ret i1 %cmp +} + + +@a5 = constant [5 x i8] c"12345"; + +; Fold strchr(a5, c) == a5 to *a5 == c. + +define i1 @fold_strchr_a_c_eq_a(i32 %c) { +; CHECK-LABEL: @fold_strchr_a_c_eq_a( +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[C:%.*]] to i8 +; CHECK-NEXT: [[CHAR0CMP:%.*]] = icmp eq i8 [[TMP1]], 49 +; CHECK-NEXT: ret i1 [[CHAR0CMP]] +; + %p = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 0 + %q = call i8* @strchr(i8* %p, i32 %c) + %cmp = icmp eq i8* %q, %p + ret i1 %cmp +}