Index: llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -999,6 +999,7 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) { Value *SrcStr = CI->getArgOperand(0); + Value *Char = CI->getArgOperand(1); Value *Size = CI->getArgOperand(2); if (isKnownNonZero(Size, DL)) annotateNonNullNoUndefBasedOnAccess(CI, 0); @@ -1089,6 +1090,10 @@ // From now on we need a constant length and constant array. return nullptr; + bool OptForSize = CI->getFunction()->hasOptSize() || + llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI, + PGSOQueryType::IRPass); + // If the char is variable but the input str and length are not we can turn // this memchr call into a simple bit field test. Of course this only works // when the return value is only checked against null. @@ -1099,7 +1104,7 @@ // memchr("\r\n", C, 2) != nullptr -> (1 << C & ((1 << '\r') | (1 << '\n'))) // != 0 // after bounds check. - if (Str.empty() || !isOnlyUsedInZeroEqualityComparison(CI)) + if (OptForSize || Str.empty() || !isOnlyUsedInZeroEqualityComparison(CI)) return nullptr; unsigned char Max = @@ -1111,8 +1116,19 @@ // FIXME: On a 64 bit architecture this prevents us from using the // interesting range of alpha ascii chars. We could do better by emitting // two bitfields or shifting the range by 64 if no lower chars are used. - if (!DL.fitsInLegalInteger(Max + 1)) - return nullptr; + if (!DL.fitsInLegalInteger(Max + 1)) { + // Build chain of logical ORs + // Transform: + // memchr("abcd", C, 4) != nullptr + // to: + // (C == 'a' || C == 'b' || C == 'c' || C == 'd') != 0 + SmallVector CharCompares; + for (char C : Str) + CharCompares.push_back( + B.CreateICmpEQ(Char, ConstantInt::get(Char->getType(), C))); + + return B.CreateIntToPtr(B.CreateOr(CharCompares), CI->getType()); + } // For the bit field use a power-of-2 type with at least 8 bits to avoid // creating unnecessary illegal types. Index: llvm/test/Transforms/InstCombine/memchr-7.ll =================================================================== --- llvm/test/Transforms/InstCombine/memchr-7.ll +++ llvm/test/Transforms/InstCombine/memchr-7.ll @@ -9,9 +9,11 @@ define zeroext i1 @strchr_to_memchr_n_equals_len(i32 %c) { ; CHECK-LABEL: @strchr_to_memchr_n_equals_len( -; CHECK-NEXT: [[MEMCHR:%.*]] = tail call i8* @memchr(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([27 x i8], [27 x i8]* @.str, i64 0, i64 0), i32 [[C:%.*]], i64 27) -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8* [[MEMCHR]], null -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[C:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[C]], -97 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 26 +; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[TMP1]] +; CHECK-NEXT: ret i1 [[TMP4]] ; %call = tail call i8* @strchr(i8* nonnull dereferenceable(27) getelementptr inbounds ([27 x i8], [27 x i8]* @.str, i64 0, i64 0), i32 %c) %cmp = icmp ne i8* %call, null @@ -33,9 +35,9 @@ define zeroext i1 @memchr_n_less_than_len(i32 %c) { ; CHECK-LABEL: @memchr_n_less_than_len( -; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @memchr(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([27 x i8], [27 x i8]* @.str, i64 0, i64 0), i32 [[C:%.*]], i64 15) -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8* [[CALL]], null -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[C:%.*]], -97 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 15 +; CHECK-NEXT: ret i1 [[TMP2]] ; %call = tail call i8* @memchr(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @.str, i64 0, i64 0), i32 %c, i64 15) %cmp = icmp ne i8* %call, null @@ -45,9 +47,11 @@ define zeroext i1 @memchr_n_more_than_len(i32 %c) { ; CHECK-LABEL: @memchr_n_more_than_len( -; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @memchr(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([27 x i8], [27 x i8]* @.str, i64 0, i64 0), i32 [[C:%.*]], i64 30) -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8* [[CALL]], null -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[C:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[C]], -97 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 26 +; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[TMP1]] +; CHECK-NEXT: ret i1 [[TMP4]] ; %call = tail call i8* @memchr(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @.str, i64 0, i64 0), i32 %c, i64 30) %cmp = icmp ne i8* %call, null Index: llvm/test/Transforms/InstCombine/memchr.ll =================================================================== --- llvm/test/Transforms/InstCombine/memchr.ll +++ llvm/test/Transforms/InstCombine/memchr.ll @@ -148,9 +148,12 @@ ; No 64 bits here define i1 @test12(i32 %C) { ; CHECK-LABEL: @test12( -; CHECK-NEXT: [[DST:%.*]] = call i8* @memchr(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @spaces, i32 0, i32 0), i32 [[C:%.*]], i32 3) -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8* [[DST]], null -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[C:%.*]], 32 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[C]], 13 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[C]], 10 +; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], [[TMP3]] +; CHECK-NEXT: ret i1 [[TMP5]] ; %dst = call i8* @memchr(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @spaces, i64 0, i64 0), i32 %C, i32 3) %cmp = icmp ne i8* %dst, null @@ -183,9 +186,9 @@ define i1 @test15(i32 %C) { ; CHECK-LABEL: @test15( -; CHECK-NEXT: [[DST:%.*]] = call i8* @memchr(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([3 x i8], [3 x i8]* @negative, i32 0, i32 0), i32 [[C:%.*]], i32 3) -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8* [[DST]], null -; CHECK-NEXT: ret i1 [[CMP]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[C:%.*]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 3 +; CHECK-NEXT: ret i1 [[TMP2]] ; %dst = call i8* @memchr(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @negative, i64 0, i64 0), i32 %C, i32 3) %cmp = icmp ne i8* %dst, null Index: llvm/test/Transforms/PhaseOrdering/memchr-switch.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PhaseOrdering/memchr-switch.ll @@ -0,0 +1,82 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes='instcombine,simplifycfg' -S < %s | FileCheck %s + +@.str = private constant [7 x i8] c"qwerty\00", align 1 + +declare ptr @memchr(ptr noundef, i32 noundef, i64 noundef) +declare i32 @foo() + +define i32 @memchr_to_switch_br(i32 %a) { +; CHECK-LABEL: @memchr_to_switch_br( +; CHECK-NEXT: entry: +; CHECK-NEXT: switch i32 [[A:%.*]], label [[RETURN:%.*]] [ +; CHECK-NEXT: i32 121, label [[IF_THEN:%.*]] +; CHECK-NEXT: i32 119, label [[IF_THEN]] +; CHECK-NEXT: i32 116, label [[IF_THEN]] +; CHECK-NEXT: i32 114, label [[IF_THEN]] +; CHECK-NEXT: i32 113, label [[IF_THEN]] +; CHECK-NEXT: i32 101, label [[IF_THEN]] +; CHECK-NEXT: ] +; CHECK: if.then: +; CHECK-NEXT: [[CALL1:%.*]] = call i32 @foo() +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], [[IF_THEN]] ], [ -1, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[RETVAL_0]] +; +entry: + %call = call ptr @memchr(ptr noundef nonnull @.str, i32 noundef %a, i64 noundef 6) #3 + %cmp.not = icmp eq ptr %call, null + br i1 %cmp.not, label %return, label %if.then + +if.then: ; preds = %entry + %call1 = call i32 @foo() #3 + br label %return + +return: ; preds = %entry, %if.then + %retval.0 = phi i32 [ %call1, %if.then ], [ -1, %entry ] + ret i32 %retval.0 +} + +; FIXME: SimplifyCFG is unable to convert ORs to switch +; See https://github.com/llvm/llvm-project/issues/56087 +define i1 @memchr_to_switch_eq_ret(i32 %c) { +; CHECK-LABEL: @memchr_to_switch_eq_ret( +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[C:%.*]], 113 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[C]], 119 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[C]], 101 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[C]], 114 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[C]], 116 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[C]], 121 +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[TMP4]] +; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[TMP5]] +; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP10]], [[TMP6]] +; CHECK-NEXT: [[RET:%.*]] = xor i1 [[TMP11]], true +; CHECK-NEXT: ret i1 [[RET]] +; + %p = tail call ptr @memchr(ptr noundef nonnull @.str, i32 noundef %c, i64 noundef 6) + %ret = icmp eq ptr %p, null + ret i1 %ret +} + +define i1 @memchr_to_switch_ne_ret(i32 %c) { +; CHECK-LABEL: @memchr_to_switch_ne_ret( +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[C:%.*]], 113 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[C]], 119 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[C]], 101 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[C]], 114 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[C]], 116 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[C]], 121 +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP7]], [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[TMP4]] +; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[TMP5]] +; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP10]], [[TMP6]] +; CHECK-NEXT: ret i1 [[TMP11]] +; + %p = tail call ptr @memchr(ptr noundef nonnull @.str, i32 noundef %c, i64 noundef 6) + %ret = icmp ne ptr %p, null + ret i1 %ret +}