Index: llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1365,6 +1365,10 @@ return nullptr; } + bool OptForSize = CI->getFunction()->hasOptSize() || + llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI, + PGSOQueryType::IRPass); + // If the char is variable but the input str and length are not we can turn // this memchr call into a simple bit field test. Of course this only works // when the return value is only checked against null. @@ -1375,7 +1379,7 @@ // memchr("\r\n", C, 2) != nullptr -> (1 << C & ((1 << '\r') | (1 << '\n'))) // != 0 // after bounds check. - if (Str.empty() || !isOnlyUsedInZeroEqualityComparison(CI)) + if (OptForSize || Str.empty() || !isOnlyUsedInZeroEqualityComparison(CI)) return nullptr; unsigned char Max = @@ -1387,8 +1391,33 @@ // FIXME: On a 64 bit architecture this prevents us from using the // interesting range of alpha ascii chars. We could do better by emitting // two bitfields or shifting the range by 64 if no lower chars are used. - if (!DL.fitsInLegalInteger(Max + 1)) - return nullptr; + if (!DL.fitsInLegalInteger(Max + 1)) { + // Build chain of logical ORs + // Transform: + // memchr("abcd", C, 4) != nullptr + // to: + // (C == 'a' || C == 'b' || C == 'c' || C == 'd') != 0 + std::string SortedStr = Str.str(); + llvm::sort(SortedStr); + // Compute the number of of non-contiguous ranges. + unsigned NonContRanges = 1; + for (size_t i = 1; i < SortedStr.size(); ++i) { + if (SortedStr[i] > SortedStr[i - 1] + 1) { + NonContRanges++; + } + } + + // Restrict this optimization to profitable cases with one or two range checks. + if (NonContRanges > 2) + return nullptr; + + SmallVector CharCompares; + for (char C : SortedStr) + CharCompares.push_back( + B.CreateICmpEQ(CharVal, ConstantInt::get(CharVal->getType(), C))); + + return B.CreateIntToPtr(B.CreateOr(CharCompares), CI->getType()); + } // For the bit field use a power-of-2 type with at least 8 bits to avoid // creating unnecessary illegal types. Index: llvm/test/Transforms/InstCombine/memchr-7.ll =================================================================== --- llvm/test/Transforms/InstCombine/memchr-7.ll +++ llvm/test/Transforms/InstCombine/memchr-7.ll @@ -9,9 +9,11 @@ define zeroext i1 @strchr_to_memchr_n_equals_len(i32 %c) { ; CHECK-LABEL: @strchr_to_memchr_n_equals_len( -; CHECK-NEXT: [[MEMCHR:%.*]] = tail call ptr @memchr(ptr noundef nonnull dereferenceable(1) @.str, i32 [[C:%.*]], i64 27) -; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[MEMCHR]], null -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[C:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[C]], -97 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 26 +; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP1]], [[TMP3]] +; CHECK-NEXT: ret i1 [[TMP4]] ; %call = tail call ptr @strchr(ptr nonnull dereferenceable(27) @.str, i32 %c) %cmp = icmp ne ptr %call, null @@ -33,9 +35,9 @@ define zeroext i1 @memchr_n_less_than_len(i32 %c) { ; CHECK-LABEL: @memchr_n_less_than_len( -; CHECK-NEXT: [[CALL:%.*]] = tail call ptr @memchr(ptr noundef nonnull dereferenceable(1) @.str, i32 [[C:%.*]], i64 15) -; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[CALL]], null -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[C:%.*]], -97 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 15 +; CHECK-NEXT: ret i1 [[TMP2]] ; %call = tail call ptr @memchr(ptr @.str, i32 %c, i64 15) %cmp = icmp ne ptr %call, null @@ -45,9 +47,11 @@ define zeroext i1 @memchr_n_more_than_len(i32 %c) { ; CHECK-LABEL: @memchr_n_more_than_len( -; CHECK-NEXT: [[CALL:%.*]] = tail call ptr @memchr(ptr noundef nonnull dereferenceable(1) @.str, i32 [[C:%.*]], i64 30) -; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[CALL]], null -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[C:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[C]], -97 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 26 +; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP1]], [[TMP3]] +; CHECK-NEXT: ret i1 [[TMP4]] ; %call = tail call ptr @memchr(ptr @.str, i32 %c, i64 30) %cmp = icmp ne ptr %call, null Index: llvm/test/Transforms/InstCombine/memchr.ll =================================================================== --- llvm/test/Transforms/InstCombine/memchr.ll +++ llvm/test/Transforms/InstCombine/memchr.ll @@ -174,9 +174,9 @@ define i1 @test15(i32 %C) { ; CHECK-LABEL: @test15( -; CHECK-NEXT: [[DST:%.*]] = call ptr @memchr(ptr noundef nonnull dereferenceable(1) @negative, i32 [[C:%.*]], i32 3) -; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr [[DST]], null -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[C:%.*]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 3 +; CHECK-NEXT: ret i1 [[TMP2]] ; %dst = call ptr @memchr(ptr @negative, i32 %C, i32 3) %cmp = icmp ne ptr %dst, null Index: llvm/test/Transforms/PhaseOrdering/memchr-expansion.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/memchr-expansion.ll +++ llvm/test/Transforms/PhaseOrdering/memchr-expansion.ll @@ -6,12 +6,12 @@ declare ptr @memchr(ptr noundef, i32 noundef, i64 noundef) declare i32 @foo() -define i32 @memchr_to_switch_br(i32 %a) { -; CHECK-LABEL: @memchr_to_switch_br( +define i32 @memchr_to_rangecheck_br(i32 %a) { +; CHECK-LABEL: @memchr_to_rangecheck_br( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = call ptr @memchr(ptr noundef nonnull dereferenceable(1) @.str, i32 noundef [[A:%.*]], i64 noundef 6) -; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[CALL]], null -; CHECK-NEXT: br i1 [[CMP_NOT]], label [[RETURN:%.*]], label [[IF_THEN:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[A:%.*]], -103 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP0]], -6 +; CHECK-NEXT: br i1 [[TMP1]], label [[RETURN:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: ; CHECK-NEXT: [[CALL1:%.*]] = call i32 @foo() ; CHECK-NEXT: br label [[RETURN]] @@ -33,22 +33,22 @@ ret i32 %retval.0 } -define i1 @memchr_to_switch_eq_ret(i32 %c) { -; CHECK-LABEL: @memchr_to_switch_eq_ret( -; CHECK-NEXT: [[P:%.*]] = tail call ptr @memchr(ptr noundef nonnull dereferenceable(1) @.str, i32 noundef [[C:%.*]], i64 noundef 6) -; CHECK-NEXT: [[RET:%.*]] = icmp eq ptr [[P]], null -; CHECK-NEXT: ret i1 [[RET]] +define i1 @memchr_to_rangecheck_eq_ret(i32 %c) { +; CHECK-LABEL: @memchr_to_rangecheck_eq_ret( +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[C:%.*]], -103 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], -6 +; CHECK-NEXT: ret i1 [[TMP2]] ; %p = tail call ptr @memchr(ptr noundef nonnull @.str, i32 noundef %c, i64 noundef 6) %ret = icmp eq ptr %p, null ret i1 %ret } -define i1 @memchr_to_switch_ne_ret(i32 %c) { -; CHECK-LABEL: @memchr_to_switch_ne_ret( -; CHECK-NEXT: [[P:%.*]] = tail call ptr @memchr(ptr noundef nonnull dereferenceable(1) @.str, i32 noundef [[C:%.*]], i64 noundef 6) -; CHECK-NEXT: [[RET:%.*]] = icmp ne ptr [[P]], null -; CHECK-NEXT: ret i1 [[RET]] +define i1 @memchr_to_rangecheck_ne_ret(i32 %c) { +; CHECK-LABEL: @memchr_to_rangecheck_ne_ret( +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[C:%.*]], -97 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 6 +; CHECK-NEXT: ret i1 [[TMP2]] ; %p = tail call ptr @memchr(ptr noundef nonnull @.str, i32 noundef %c, i64 noundef 6) %ret = icmp ne ptr %p, null