diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1365,6 +1365,10 @@ return nullptr; } + bool OptForSize = CI->getFunction()->hasOptSize() || + llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI, + PGSOQueryType::IRPass); + // If the char is variable but the input str and length are not we can turn // this memchr call into a simple bit field test. Of course this only works // when the return value is only checked against null. @@ -1375,7 +1379,7 @@ // memchr("\r\n", C, 2) != nullptr -> (1 << C & ((1 << '\r') | (1 << '\n'))) // != 0 // after bounds check. - if (Str.empty() || !isOnlyUsedInZeroEqualityComparison(CI)) + if (OptForSize || Str.empty() || !isOnlyUsedInZeroEqualityComparison(CI)) return nullptr; unsigned char Max = @@ -1387,8 +1391,34 @@ // FIXME: On a 64 bit architecture this prevents us from using the // interesting range of alpha ascii chars. We could do better by emitting // two bitfields or shifting the range by 64 if no lower chars are used. - if (!DL.fitsInLegalInteger(Max + 1)) - return nullptr; + if (!DL.fitsInLegalInteger(Max + 1)) { + // Build chain of ORs + // Transform: + // memchr("abcd", C, 4) != nullptr + // to: + // (C == 'a' || C == 'b' || C == 'c' || C == 'd') != 0 + std::string SortedStr = Str.str(); + llvm::sort(SortedStr); + // Compute the number of of non-contiguous ranges. + unsigned NonContRanges = 1; + for (size_t i = 1; i < SortedStr.size(); ++i) { + if (SortedStr[i] > SortedStr[i - 1] + 1) { + NonContRanges++; + } + } + + // Restrict this optimization to profitable cases with one or two range + // checks. + if (NonContRanges > 2) + return nullptr; + + SmallVector CharCompares; + for (char C : SortedStr) + CharCompares.push_back( + B.CreateICmpEQ(CharVal, ConstantInt::get(CharVal->getType(), C))); + + return B.CreateIntToPtr(B.CreateOr(CharCompares), CI->getType()); + } // For the bit field use a power-of-2 type with at least 8 bits to avoid // creating unnecessary illegal types. diff --git a/llvm/test/Transforms/InstCombine/memchr-7.ll b/llvm/test/Transforms/InstCombine/memchr-7.ll --- a/llvm/test/Transforms/InstCombine/memchr-7.ll +++ b/llvm/test/Transforms/InstCombine/memchr-7.ll @@ -1,17 +1,22 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=instcombine -S | FileCheck %s +; RUN: opt < %s -passes=instcombine -instcombine-infinite-loop-threshold=2 -S | FileCheck %s @.str = private unnamed_addr constant [27 x i8] c"abcdefghijklmnopqrstuvwxyz\00", align 1 @.str.1 = private unnamed_addr constant [2 x i8] c"\0D\0A", align 1 +@.str.2 = private unnamed_addr constant [10 x i8] c"abcdefmno\00", align 1 +@.str.3 = private unnamed_addr constant [10 x i8] c"abcijkmno\00", align 1 +@.str.4 = private unnamed_addr constant [7 x i8] c"mnabcc\00", align 1 declare ptr @strchr(ptr, i32) declare ptr @memchr(ptr, i32, i64) define zeroext i1 @strchr_to_memchr_n_equals_len(i32 %c) { ; CHECK-LABEL: @strchr_to_memchr_n_equals_len( -; CHECK-NEXT: [[MEMCHR:%.*]] = tail call ptr @memchr(ptr noundef nonnull dereferenceable(1) @.str, i32 [[C:%.*]], i64 27) -; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[MEMCHR]], null -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[C:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[C]], -97 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 26 +; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP1]], [[TMP3]] +; CHECK-NEXT: ret i1 [[TMP4]] ; %call = tail call ptr @strchr(ptr nonnull dereferenceable(27) @.str, i32 %c) %cmp = icmp ne ptr %call, null @@ -33,9 +38,9 @@ define zeroext i1 @memchr_n_less_than_len(i32 %c) { ; CHECK-LABEL: @memchr_n_less_than_len( -; CHECK-NEXT: [[CALL:%.*]] = tail call ptr @memchr(ptr noundef nonnull dereferenceable(1) @.str, i32 [[C:%.*]], i64 15) -; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[CALL]], null -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[C:%.*]], -97 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 15 +; CHECK-NEXT: ret i1 [[TMP2]] ; %call = tail call ptr @memchr(ptr @.str, i32 %c, i64 15) %cmp = icmp ne ptr %call, null @@ -45,9 +50,11 @@ define zeroext i1 @memchr_n_more_than_len(i32 %c) { ; CHECK-LABEL: @memchr_n_more_than_len( -; CHECK-NEXT: [[CALL:%.*]] = tail call ptr @memchr(ptr noundef nonnull dereferenceable(1) @.str, i32 [[C:%.*]], i64 30) -; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[CALL]], null -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[C:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[C]], -97 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 26 +; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP1]], [[TMP3]] +; CHECK-NEXT: ret i1 [[TMP4]] ; %call = tail call ptr @memchr(ptr @.str, i32 %c, i64 30) %cmp = icmp ne ptr %call, null @@ -103,3 +110,45 @@ %cmp = icmp eq ptr %call, null ret i1 %cmp } + +; Positive test - 2 non-contiguous ranges +define zeroext i1 @strchr_to_memchr_2_non_cont_ranges(i32 %c) { +; CHECK-LABEL: @strchr_to_memchr_2_non_cont_ranges( +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[C:%.*]], -97 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 6 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[C]], -109 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[TMP3]], 3 +; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP2]], [[TMP4]] +; CHECK-NEXT: ret i1 [[TMP5]] +; + %call = tail call ptr @memchr(ptr @.str.2, i32 %c, i64 9) + %cmp = icmp ne ptr %call, null + ret i1 %cmp +} + +; Positive test - 2 non-contiguous ranges with char duplication +define zeroext i1 @strchr_to_memchr_2_non_cont_ranges_char_dup(i32 %c) { +; CHECK-LABEL: @strchr_to_memchr_2_non_cont_ranges_char_dup( +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[C:%.*]], -97 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[C]], -109 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[TMP3]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP2]], [[TMP4]] +; CHECK-NEXT: ret i1 [[TMP5]] +; + %call = tail call ptr @memchr(ptr @.str.4, i32 %c, i64 6) + %cmp = icmp ne ptr %call, null + ret i1 %cmp +} + +; Negative test - more than 2 non-contiguous ranges +define zeroext i1 @strchr_to_memchr_3_non_cont_ranges(i32 %c) { +; CHECK-LABEL: @strchr_to_memchr_3_non_cont_ranges( +; CHECK-NEXT: [[CALL:%.*]] = tail call ptr @memchr(ptr noundef nonnull dereferenceable(1) @.str.3, i32 [[C:%.*]], i64 9) +; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[CALL]], null +; CHECK-NEXT: ret i1 [[CMP]] +; + %call = tail call ptr @memchr(ptr @.str.3, i32 %c, i64 9) + %cmp = icmp ne ptr %call, null + ret i1 %cmp +} diff --git a/llvm/test/Transforms/InstCombine/memchr.ll b/llvm/test/Transforms/InstCombine/memchr.ll --- a/llvm/test/Transforms/InstCombine/memchr.ll +++ b/llvm/test/Transforms/InstCombine/memchr.ll @@ -174,9 +174,9 @@ define i1 @test15(i32 %C) { ; CHECK-LABEL: @test15( -; CHECK-NEXT: [[DST:%.*]] = call ptr @memchr(ptr noundef nonnull dereferenceable(1) @negative, i32 [[C:%.*]], i32 3) -; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[DST]], null -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[C:%.*]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 3 +; CHECK-NEXT: ret i1 [[TMP2]] ; %dst = call ptr @memchr(ptr @negative, i32 %C, i32 3) %cmp = icmp ne ptr %dst, null diff --git a/llvm/test/Transforms/PhaseOrdering/memchr-expansion.ll b/llvm/test/Transforms/PhaseOrdering/memchr-expansion.ll deleted file mode 100644 --- a/llvm/test/Transforms/PhaseOrdering/memchr-expansion.ll +++ /dev/null @@ -1,56 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes='instcombine,simplifycfg' -S < %s | FileCheck %s - -@.str = private constant [7 x i8] c"abcdef\00", align 1 - -declare ptr @memchr(ptr noundef, i32 noundef, i64 noundef) -declare i32 @foo() - -define i32 @memchr_to_switch_br(i32 %a) { -; CHECK-LABEL: @memchr_to_switch_br( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = call ptr @memchr(ptr noundef nonnull dereferenceable(1) @.str, i32 noundef [[A:%.*]], i64 noundef 6) -; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[CALL]], null -; CHECK-NEXT: br i1 [[CMP_NOT]], label [[RETURN:%.*]], label [[IF_THEN:%.*]] -; CHECK: if.then: -; CHECK-NEXT: [[CALL1:%.*]] = call i32 @foo() -; CHECK-NEXT: br label [[RETURN]] -; CHECK: return: -; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], [[IF_THEN]] ], [ -1, [[ENTRY:%.*]] ] -; CHECK-NEXT: ret i32 [[RETVAL_0]] -; -entry: - %call = call ptr @memchr(ptr noundef nonnull @.str, i32 noundef %a, i64 noundef 6) #3 - %cmp.not = icmp eq ptr %call, null - br i1 %cmp.not, label %return, label %if.then - -if.then: ; preds = %entry - %call1 = call i32 @foo() #3 - br label %return - -return: ; preds = %entry, %if.then - %retval.0 = phi i32 [ %call1, %if.then ], [ -1, %entry ] - ret i32 %retval.0 -} - -define i1 @memchr_to_switch_eq_ret(i32 %c) { -; CHECK-LABEL: @memchr_to_switch_eq_ret( -; CHECK-NEXT: [[P:%.*]] = tail call ptr @memchr(ptr noundef nonnull dereferenceable(1) @.str, i32 noundef [[C:%.*]], i64 noundef 6) -; CHECK-NEXT: [[RET:%.*]] = icmp eq ptr [[P]], null -; CHECK-NEXT: ret i1 [[RET]] -; - %p = tail call ptr @memchr(ptr noundef nonnull @.str, i32 noundef %c, i64 noundef 6) - %ret = icmp eq ptr %p, null - ret i1 %ret -} - -define i1 @memchr_to_switch_ne_ret(i32 %c) { -; CHECK-LABEL: @memchr_to_switch_ne_ret( -; CHECK-NEXT: [[P:%.*]] = tail call ptr @memchr(ptr noundef nonnull dereferenceable(1) @.str, i32 noundef [[C:%.*]], i64 noundef 6) -; CHECK-NEXT: [[RET:%.*]] = icmp ne ptr [[P]], null -; CHECK-NEXT: ret i1 [[RET]] -; - %p = tail call ptr @memchr(ptr noundef nonnull @.str, i32 noundef %c, i64 noundef 6) - %ret = icmp ne ptr %p, null - ret i1 %ret -}