diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -901,6 +901,7 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) { Value *SrcStr = CI->getArgOperand(0); + Value *Char = CI->getArgOperand(1); Value *Size = CI->getArgOperand(2); annotateNonNullAndDereferenceable(CI, 0, Size, DL); ConstantInt *CharC = dyn_cast(CI->getArgOperand(1)); @@ -923,7 +924,9 @@ // scan the string, as reading past the end of it is undefined and we can just // return null if we don't find the char. Str = Str.substr(0, LenC->getZExtValue()); - + bool OptForSize = CI->getFunction()->hasOptSize() || + llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI, + PGSOQueryType::IRPass); // If the char is variable but the input str and length are not we can turn // this memchr call into a simple bit field test. Of course this only works // when the return value is only checked against null. @@ -934,18 +937,33 @@ // memchr("\r\n", C, 2) != nullptr -> (1 << C & ((1 << '\r') | (1 << '\n'))) // != 0 // after bounds check. - if (!CharC && !Str.empty() && isOnlyUsedInZeroEqualityComparison(CI)) { + if (!OptForSize && !CharC && !Str.empty() && + isOnlyUsedInZeroEqualityComparison(CI)) { unsigned char Max = *std::max_element(reinterpret_cast(Str.begin()), reinterpret_cast(Str.end())); // Make sure the bit field we're about to create fits in a register on the // target. - // FIXME: On a 64 bit architecture this prevents us from using the - // interesting range of alpha ascii chars. We could do better by emitting - // two bitfields or shifting the range by 64 if no lower chars are used. - if (!DL.fitsInLegalInteger(Max + 1)) - return nullptr; + if (!DL.fitsInLegalInteger(Max + 1)) { + // Build chain of OR comparisons + // memchr("abcd", C, 4) != nullptr -> (C == 'a' | C == 'b' | C == 'c' | + // C == 'd') != 0 + SmallVector CharCompares; + for (char C : Str) + CharCompares.push_back( + B.CreateICmpEQ(Char, ConstantInt::get(Char->getType(), C))); + + Value *OrChain = CharCompares[0]; + for (size_t i = 1; i < CharCompares.size(); i++) + OrChain = B.CreateOr(OrChain, CharCompares[i]); + + return B.CreateIntToPtr(OrChain, CI->getType()); + + // FIXME: On a 64 bit architecture this prevents us from using the + // interesting range of alpha ascii chars. We could do better by emitting + // two bitfields or shifting the range by 64 if no lower chars are used. + } // For the bit field use a power-of-2 type with at least 8 bits to avoid // creating unnecessary illegal types. diff --git a/llvm/test/Transforms/InstCombine/memchr-2.ll b/llvm/test/Transforms/InstCombine/memchr-2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/memchr-2.ll @@ -0,0 +1,102 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +@.str = private unnamed_addr constant [27 x i8] c"abcdefghijklmnopqrstuvwxyz\00", align 1 +@.str.1 = private unnamed_addr constant [2 x i8] c"\0D\0A", align 1 + +declare i8* @strchr(i8*, i32) +declare i8* @memchr(i8*, i32, i64) + +define zeroext i1 @memchr_n_equals_len(i32 %c) { +; CHECK-LABEL: @memchr_n_equals_len( +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[C:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[C]], -97 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 26 +; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[TMP1]] +; CHECK-NEXT: ret i1 [[TMP4]] +; + %call = tail call i8* @strchr(i8* nonnull dereferenceable(27) getelementptr inbounds ([27 x i8], [27 x i8]* @.str, i64 0, i64 0), i32 %c) + %cmp = icmp ne i8* %call, null + ret i1 %cmp +} + +define zeroext i1 @memchr_n_equals_len2(i32 %c) { +; CHECK-LABEL: @memchr_n_equals_len2( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[C:%.*]], 13 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[C]], 10 +; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret i1 [[TMP3]] +; + %call = tail call i8* @memchr(i8* nonnull dereferenceable(3) getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0), i32 %c, i64 2) + %cmp = icmp eq i8* %call, null + ret i1 %cmp +} + +define zeroext i1 @memchr_n_less_than_len(i32 %c) { +; CHECK-LABEL: @memchr_n_less_than_len( +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[C:%.*]], -97 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 15 +; CHECK-NEXT: ret i1 [[TMP2]] +; + %call = tail call i8* @memchr(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @.str, i64 0, i64 0), i32 %c, i64 15) + %cmp = icmp ne i8* %call, null + ret i1 %cmp +} + + +define zeroext i1 @memchr_n_more_than_len(i32 %c) { +; CHECK-LABEL: @memchr_n_more_than_len( +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[C:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[C]], -97 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 26 +; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[TMP1]] +; CHECK-NEXT: ret i1 [[TMP4]] +; + %call = tail call i8* @memchr(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @.str, i64 0, i64 0), i32 %c, i64 30) + %cmp = icmp ne i8* %call, null + ret i1 %cmp +} + +; Negative test - no comparison with zero + +define i8* @memchr_no_zero_cmp(i32 %c) { +; CHECK-LABEL: @memchr_no_zero_cmp( +; CHECK-NEXT: [[MEMCHR:%.*]] = call i8* @memchr(i8* nonnull dereferenceable(27) getelementptr inbounds ([27 x i8], [27 x i8]* @.str, i64 0, i64 0), i32 [[C:%.*]], i64 27) +; CHECK-NEXT: ret i8* [[MEMCHR]] +; + %call = tail call i8* @strchr(i8* nonnull dereferenceable(27) getelementptr inbounds ([27 x i8], [27 x i8]* @.str, i64 0, i64 0), i32 %c) + ret i8* %call +} + +define i8* @memchr_no_zero_cmp2(i32 %c) { +; CHECK-LABEL: @memchr_no_zero_cmp2( +; CHECK-NEXT: [[MEMCHR:%.*]] = call i8* @memchr(i8* nonnull dereferenceable(3) getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0), i32 [[C:%.*]], i64 3) +; CHECK-NEXT: ret i8* [[MEMCHR]] +; + %call = tail call i8* @strchr(i8* nonnull dereferenceable(3) getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0), i32 %c) + ret i8* %call +} + +; Negative test - opt for size + +define zeroext i1 @memchr_n_equals_len_minsize(i32 %c) minsize { +; CHECK-LABEL: @memchr_n_equals_len_minsize( +; CHECK-NEXT: [[MEMCHR:%.*]] = call i8* @memchr(i8* nonnull dereferenceable(27) getelementptr inbounds ([27 x i8], [27 x i8]* @.str, i64 0, i64 0), i32 [[C:%.*]], i64 27) +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8* [[MEMCHR]], null +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i8* @strchr(i8* nonnull dereferenceable(27) getelementptr inbounds ([27 x i8], [27 x i8]* @.str, i64 0, i64 0), i32 %c) + %cmp = icmp ne i8* %call, null + ret i1 %cmp +} + +define zeroext i1 @memchr_n_equals_len2_minsize(i32 %c) minsize { +; CHECK-LABEL: @memchr_n_equals_len2_minsize( +; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @memchr(i8* nonnull dereferenceable(3) getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0), i32 [[C:%.*]], i64 2) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[CALL]], null +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call i8* @memchr(i8* nonnull dereferenceable(3) getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0), i32 %c, i64 2) + %cmp = icmp eq i8* %call, null + ret i1 %cmp +}