diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -428,6 +428,12 @@ return nullptr; } +// Optimize a memcmp or, when StrNCmp is true, strncmp call CI with constant +// arrays LHS and RHS and nonconstant Size. +static Value *optimizeMemCmpVarSize(CallInst *CI, Value *LHS, Value *RHS, + Value *Size, bool StrNCmp, + IRBuilderBase &B, const DataLayout &DL); + Value *LibCallSimplifier::optimizeStrNCmp(CallInst *CI, IRBuilderBase &B) { Value *Str1P = CI->getArgOperand(0); Value *Str2P = CI->getArgOperand(1); @@ -442,7 +448,7 @@ if (ConstantInt *LengthArg = dyn_cast(Size)) Length = LengthArg->getZExtValue(); else - return nullptr; + return optimizeMemCmpVarSize(CI, Str1P, Str2P, Size, true, B, DL); if (Length == 0) // strncmp(x,y,0) -> 0 return ConstantInt::get(CI->getType(), 0); @@ -1155,11 +1161,11 @@ CI->getType()); } -// Optimize a memcmp call CI with constant arrays LHS and RHS and either -// nonconstant Size or constant size known to be in bounds. +// Optimize a memcmp or, when StrNCmp is true, strncmp call CI with constant +// arrays LHS and RHS and nonconstant Size. static Value *optimizeMemCmpVarSize(CallInst *CI, Value *LHS, Value *RHS, - Value *Size, IRBuilderBase &B, - const DataLayout &DL) { + Value *Size, bool StrNCmp, + IRBuilderBase &B, const DataLayout &DL) { if (LHS == RHS) // memcmp(s,s,x) -> 0 return Constant::getNullValue(CI->getType()); @@ -1173,30 +1179,29 @@ // N <= Pos ? 0 : (A < B ? -1 : B < A ? +1 : 0) // where Pos is the first mismatch between A and B, determined below. + uint64_t Pos = 0; Value *Zero = ConstantInt::get(CI->getType(), 0); - - uint64_t MinSize = std::min(LStr.size(), RStr.size()); - for (uint64_t Pos = 0; Pos < MinSize; ++Pos) { - if (LStr[Pos] != RStr[Pos]) { - Value *MaxSize = ConstantInt::get(Size->getType(), Pos); - Value *Cmp = B.CreateICmp(ICmpInst::ICMP_ULE, Size, MaxSize); - typedef unsigned char UChar; - int IRes = UChar(LStr[Pos]) < UChar(RStr[Pos]) ? -1 : 1; - Value *Res = ConstantInt::get(CI->getType(), IRes); - return B.CreateSelect(Cmp, Zero, Res); + for (uint64_t MinSize = std::min(LStr.size(), RStr.size()); ; ++Pos) { + if (Pos == MinSize || + (StrNCmp && (LStr[Pos] == '\0' && RStr[Pos] == '\0'))) { + // One array is a leading part of the other of equal or greater + // size, or for strncmp, the arrays are equal strings. + // Fold the result to zero. Size is assumed to be in bounds, since + // otherwise the call would be undefined. + return Zero; } - } - if (auto *SizeC = dyn_cast(Size)) - if (MinSize < SizeC->getZExtValue()) - // Fail if the bound happens to be constant and excessive and - // let sanitizers catch it. - return nullptr; + if (LStr[Pos] != RStr[Pos]) + break; + } - // One array is a leading part of the other of equal or greater size. - // Fold the result to zero. Nonconstant size is assumed to be in bounds, - // since otherwise the call would be undefined. - return Zero; + // Normalize the result. + typedef unsigned char UChar; + int IRes = UChar(LStr[Pos]) < UChar(RStr[Pos]) ? -1 : 1; + Value *MaxSize = ConstantInt::get(Size->getType(), Pos); + Value *Cmp = B.CreateICmp(ICmpInst::ICMP_ULE, Size, MaxSize); + Value *Res = ConstantInt::get(CI->getType(), IRes); + return B.CreateSelect(Cmp, Zero, Res); } // Optimize a memcmp call CI with constant size Len. @@ -1265,7 +1270,7 @@ annotateNonNullAndDereferenceable(CI, {0, 1}, Size, DL); - if (Value *Res = optimizeMemCmpVarSize(CI, LHS, RHS, Size, B, DL)) + if (Value *Res = optimizeMemCmpVarSize(CI, LHS, RHS, Size, false, B, DL)) return Res; // Handle constant Size. diff --git a/llvm/test/Transforms/InstCombine/memcmp-4.ll b/llvm/test/Transforms/InstCombine/memcmp-4.ll --- a/llvm/test/Transforms/InstCombine/memcmp-4.ll +++ b/llvm/test/Transforms/InstCombine/memcmp-4.ll @@ -17,14 +17,14 @@ ; value (analogous to strncmp) is safer than letting a SIMD library ; implementation return a bogus value. -define void @fold_memcmp_too_big(i32* %pcmp) { -; BE-LABEL: @fold_memcmp_too_big( +define void @fold_memcmp_mismatch_too_big(i32* %pcmp) { +; BE-LABEL: @fold_memcmp_mismatch_too_big( ; BE-NEXT: store i32 -1, i32* [[PCMP:%.*]], align 4 ; BE-NEXT: [[PSTOR_CB:%.*]] = getelementptr i32, i32* [[PCMP]], i64 1 ; BE-NEXT: store i32 1, i32* [[PSTOR_CB]], align 4 ; BE-NEXT: ret void ; -; LE-LABEL: @fold_memcmp_too_big( +; LE-LABEL: @fold_memcmp_mismatch_too_big( ; LE-NEXT: store i32 -1, i32* [[PCMP:%.*]], align 4 ; LE-NEXT: [[PSTOR_CB:%.*]] = getelementptr i32, i32* [[PCMP]], i64 1 ; LE-NEXT: store i32 1, i32* [[PSTOR_CB]], align 4 @@ -47,23 +47,21 @@ } -; Don't fold calls with excessive byte counts of arrays with the same bytes. +; Fold even calls with excessive byte counts of arrays with matching bytes. +; Like in the instances above, this is preferable to letting the undefined +; calls take place, although it does prevent sanitizers from detecting them. -define void @call_memcmp_too_big(i32* %pcmp) { -; BE-LABEL: @call_memcmp_too_big( -; BE-NEXT: [[CMP_AB_9:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(9) bitcast ([4 x i16]* @ia16a to i8*), i8* noundef nonnull dereferenceable(9) bitcast ([5 x i16]* @ia16b to i8*), i64 9) -; BE-NEXT: store i32 [[CMP_AB_9]], i32* [[PCMP:%.*]], align 4 -; BE-NEXT: [[CMP_AB_M1:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(18446744073709551615) bitcast ([4 x i16]* @ia16a to i8*), i8* noundef nonnull dereferenceable(18446744073709551615) bitcast ([5 x i16]* @ia16b to i8*), i64 -1) +define void @fold_memcmp_match_too_big(i32* %pcmp) { +; BE-LABEL: @fold_memcmp_match_too_big( +; BE-NEXT: store i32 0, i32* [[PCMP:%.*]], align 4 ; BE-NEXT: [[PSTOR_AB_M1:%.*]] = getelementptr i32, i32* [[PCMP]], i64 1 -; BE-NEXT: store i32 [[CMP_AB_M1]], i32* [[PSTOR_AB_M1]], align 4 +; BE-NEXT: store i32 0, i32* [[PSTOR_AB_M1]], align 4 ; BE-NEXT: ret void ; -; LE-LABEL: @call_memcmp_too_big( -; LE-NEXT: [[CMP_AB_9:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(9) bitcast ([4 x i16]* @ia16a to i8*), i8* noundef nonnull dereferenceable(9) bitcast ([5 x i16]* @ia16b to i8*), i64 9) -; LE-NEXT: store i32 [[CMP_AB_9]], i32* [[PCMP:%.*]], align 4 -; LE-NEXT: [[CMP_AB_M1:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(18446744073709551615) bitcast ([4 x i16]* @ia16a to i8*), i8* noundef nonnull dereferenceable(18446744073709551615) bitcast ([5 x i16]* @ia16b to i8*), i64 -1) +; LE-LABEL: @fold_memcmp_match_too_big( +; LE-NEXT: store i32 0, i32* [[PCMP:%.*]], align 4 ; LE-NEXT: [[PSTOR_AB_M1:%.*]] = getelementptr i32, i32* [[PCMP]], i64 1 -; LE-NEXT: store i32 [[CMP_AB_M1]], i32* [[PSTOR_AB_M1]], align 4 +; LE-NEXT: store i32 0, i32* [[PSTOR_AB_M1]], align 4 ; LE-NEXT: ret void ; %p0 = getelementptr [4 x i16], [4 x i16]* @ia16a, i64 0, i64 0 diff --git a/llvm/test/Transforms/InstCombine/strncmp-5.ll b/llvm/test/Transforms/InstCombine/strncmp-5.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/strncmp-5.ll @@ -0,0 +1,357 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=instcombine -S | FileCheck %s +; +; Exercise folding of strncmp calls with constant arrays and nonconstant +; sizes. + +declare i32 @strncmp(i8*, i8*, i64) + +@ax = external constant [8 x i8] +@a01230123 = constant [8 x i8] c"01230123" +@b01230123 = constant [8 x i8] c"01230123" +@c01230129 = constant [8 x i8] c"01230129" +@d9123_12 = constant [7 x i8] c"9123\0012" +@e9123_34 = constant [7 x i8] c"9123\0034" + + +; Exercise strncmp(A, B, N) folding of arrays with the same bytes. + +define void @fold_strncmp_a_b_n(i32* %pcmp, i64 %n) { +; CHECK-LABEL: @fold_strncmp_a_b_n( +; CHECK-NEXT: store i32 0, i32* [[PCMP:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[N:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = sext i1 [[TMP1]] to i32 +; CHECK-NEXT: [[S0_1:%.*]] = getelementptr i32, i32* [[PCMP]], i64 1 +; CHECK-NEXT: store i32 [[TMP2]], i32* [[S0_1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[N]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = sext i1 [[TMP3]] to i32 +; CHECK-NEXT: [[S0_2:%.*]] = getelementptr i32, i32* [[PCMP]], i64 2 +; CHECK-NEXT: store i32 [[TMP4]], i32* [[S0_2]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[N]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[S0_3:%.*]] = getelementptr i32, i32* [[PCMP]], i64 3 +; CHECK-NEXT: store i32 [[TMP6]], i32* [[S0_3]], align 4 +; CHECK-NEXT: [[S0_4:%.*]] = getelementptr i32, i32* [[PCMP]], i64 4 +; CHECK-NEXT: store i32 0, i32* [[S0_4]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[N]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i32 +; CHECK-NEXT: [[S0_5:%.*]] = getelementptr i32, i32* [[PCMP]], i64 5 +; CHECK-NEXT: store i32 [[TMP8]], i32* [[S0_5]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i64 [[N]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32 +; CHECK-NEXT: [[S5_0:%.*]] = getelementptr i32, i32* [[PCMP]], i64 6 +; CHECK-NEXT: store i32 [[TMP10]], i32* [[S5_0]], align 4 +; CHECK-NEXT: ret void +; + + %p0 = getelementptr [8 x i8], [8 x i8]* @a01230123, i64 0, i64 0 + + %q0 = getelementptr [8 x i8], [8 x i8]* @b01230123, i64 0, i64 0 + %q1 = getelementptr [8 x i8], [8 x i8]* @b01230123, i64 0, i64 1 + %q2 = getelementptr [8 x i8], [8 x i8]* @b01230123, i64 0, i64 2 + %q3 = getelementptr [8 x i8], [8 x i8]* @b01230123, i64 0, i64 3 + %q4 = getelementptr [8 x i8], [8 x i8]* @b01230123, i64 0, i64 4 + %q5 = getelementptr [8 x i8], [8 x i8]* @b01230123, i64 0, i64 5 + + ; Fold strncmp(a, b, n) to 0. + %c0_0 = call i32 @strncmp(i8* %p0, i8* %q0, i64 %n) + %s0_0 = getelementptr i32, i32* %pcmp, i64 0 + store i32 %c0_0, i32* %s0_0 + + ; Fold strncmp(a, b + 1, n) to N != 0 ? -1 : 0. + %c0_1 = call i32 @strncmp(i8* %p0, i8* %q1, i64 %n) + %s0_1 = getelementptr i32, i32* %pcmp, i64 1 + store i32 %c0_1, i32* %s0_1 + + ; Fold strncmp(a, b + 2, n) to N != 0 ? -1 : 0. + %c0_2 = call i32 @strncmp(i8* %p0, i8* %q2, i64 %n) + %s0_2 = getelementptr i32, i32* %pcmp, i64 2 + store i32 %c0_2, i32* %s0_2 + + ; Fold strncmp(a, b + 3, n) to N != 0 ? -1 : 0. + %c0_3 = call i32 @strncmp(i8* %p0, i8* %q3, i64 %n) + %s0_3 = getelementptr i32, i32* %pcmp, i64 3 + store i32 %c0_3, i32* %s0_3 + + ; Fold strncmp(a, b + 4, n) to 0. + %c0_4 = call i32 @strncmp(i8* %p0, i8* %q4, i64 %n) + %s0_4 = getelementptr i32, i32* %pcmp, i64 4 + store i32 %c0_4, i32* %s0_4 + + ; Fold strncmp(a, b + 5, n) to N != 0 ? -1 : 0. + %c0_5 = call i32 @strncmp(i8* %p0, i8* %q5, i64 %n) + %s0_5 = getelementptr i32, i32* %pcmp, i64 5 + store i32 %c0_5, i32* %s0_5 + + ; Fold strncmp(b + 5, a, n) to N != 0 ? +1 : 0. + %c5_0 = call i32 @strncmp(i8* %q5, i8* %p0, i64 %n) + %s5_0 = getelementptr i32, i32* %pcmp, i64 6 + store i32 %c5_0, i32* %s5_0 + + ret void +} + +; Vefify that a strncmp() call involving a constant array with unknown +; contents is not folded. + +define void @call_strncmp_a_ax_n(i32* %pcmp, i64 %n) { +; CHECK-LABEL: @call_strncmp_a_ax_n( +; CHECK-NEXT: [[C0_0:%.*]] = call i32 @strncmp(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @a01230123, i64 0, i64 0), i8* getelementptr inbounds ([8 x i8], [8 x i8]* @ax, i64 0, i64 0), i64 [[N:%.*]]) +; CHECK-NEXT: store i32 [[C0_0]], i32* [[PCMP:%.*]], align 4 +; CHECK-NEXT: ret void +; + + %p0 = getelementptr [8 x i8], [8 x i8]* @a01230123, i64 0, i64 0 + %q0 = getelementptr [8 x i8], [8 x i8]* @ax, i64 0, i64 0 + + ; Do not fold strncmp(a, ax, n). + %c0_0 = call i32 @strncmp(i8* %p0, i8* %q0, i64 %n) + %s0_0 = getelementptr i32, i32* %pcmp, i64 0 + store i32 %c0_0, i32* %s0_0 + + ret void +} + + +; Exercise strncmp(A, C, N) folding of arrays with the same leading bytes +; but a difference in the trailing byte. + +define void @fold_strncmp_a_c_n(i32* %pcmp, i64 %n) { +; CHECK-LABEL: @fold_strncmp_a_c_n( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[N:%.*]], 7 +; CHECK-NEXT: [[TMP2:%.*]] = sext i1 [[TMP1]] to i32 +; CHECK-NEXT: store i32 [[TMP2]], i32* [[PCMP:%.*]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[N]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = sext i1 [[TMP3]] to i32 +; CHECK-NEXT: [[S0_1:%.*]] = getelementptr i32, i32* [[PCMP]], i64 1 +; CHECK-NEXT: store i32 [[TMP4]], i32* [[S0_1]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[N]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[S0_2:%.*]] = getelementptr i32, i32* [[PCMP]], i64 2 +; CHECK-NEXT: store i32 [[TMP6]], i32* [[S0_2]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[N]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i32 +; CHECK-NEXT: [[S0_3:%.*]] = getelementptr i32, i32* [[PCMP]], i64 3 +; CHECK-NEXT: store i32 [[TMP8]], i32* [[S0_3]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[N]], 3 +; CHECK-NEXT: [[TMP10:%.*]] = sext i1 [[TMP9]] to i32 +; CHECK-NEXT: [[S0_4:%.*]] = getelementptr i32, i32* [[PCMP]], i64 4 +; CHECK-NEXT: store i32 [[TMP10]], i32* [[S0_4]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = icmp ugt i64 [[N]], 3 +; CHECK-NEXT: [[TMP12:%.*]] = sext i1 [[TMP11]] to i32 +; CHECK-NEXT: [[S0_5:%.*]] = getelementptr i32, i32* [[PCMP]], i64 5 +; CHECK-NEXT: store i32 [[TMP12]], i32* [[S0_5]], align 4 +; CHECK-NEXT: ret void +; + + %p0 = getelementptr [8 x i8], [8 x i8]* @a01230123, i64 0, i64 0 + + %q0 = getelementptr [8 x i8], [8 x i8]* @c01230129, i64 0, i64 0 + %q1 = getelementptr [8 x i8], [8 x i8]* @c01230129, i64 0, i64 1 + %q2 = getelementptr [8 x i8], [8 x i8]* @c01230129, i64 0, i64 2 + %q3 = getelementptr [8 x i8], [8 x i8]* @c01230129, i64 0, i64 3 + %q4 = getelementptr [8 x i8], [8 x i8]* @c01230129, i64 0, i64 4 + %q5 = getelementptr [8 x i8], [8 x i8]* @c01230129, i64 0, i64 5 + + ; Fold strncmp(a, c, n) to N > 7 ? -1 : 0. + %c0_0 = call i32 @strncmp(i8* %p0, i8* %q0, i64 %n) + %s0_0 = getelementptr i32, i32* %pcmp, i64 0 + store i32 %c0_0, i32* %s0_0 + + ; Fold strncmp(a, c + 1, n) to N != 0 ? -1 : 0. + %c0_1 = call i32 @strncmp(i8* %p0, i8* %q1, i64 %n) + %s0_1 = getelementptr i32, i32* %pcmp, i64 1 + store i32 %c0_1, i32* %s0_1 + + ; Fold strncmp(a, c + 2, n) to N != 0 ? -1 : 0. + %c0_2 = call i32 @strncmp(i8* %p0, i8* %q2, i64 %n) + %s0_2 = getelementptr i32, i32* %pcmp, i64 2 + store i32 %c0_2, i32* %s0_2 + + ; Fold strncmp(a, c + 3, n) to N != 0 ? -1 : 0. + %c0_3 = call i32 @strncmp(i8* %p0, i8* %q3, i64 %n) + %s0_3 = getelementptr i32, i32* %pcmp, i64 3 + store i32 %c0_3, i32* %s0_3 + + ; Fold strncmp(a, c + 4, n) to N > 3 ? -1 : 0. + %c0_4 = call i32 @strncmp(i8* %p0, i8* %q4, i64 %n) + %s0_4 = getelementptr i32, i32* %pcmp, i64 4 + store i32 %c0_4, i32* %s0_4 + + ; Fold strncmp(a, c + 5, n) to N != 0 ? -1 : 0. + %c0_5 = call i32 @strncmp(i8* %p0, i8* %q4, i64 %n) + %s0_5 = getelementptr i32, i32* %pcmp, i64 5 + store i32 %c0_5, i32* %s0_5 + + ret void +} + + +; Exercise strncmp(A, D, N) folding of arrays of different sizes and +; a difference in the leading byte. + +define void @fold_strncmp_a_d_n(i32* %pcmp, i64 %n) { +; CHECK-LABEL: @fold_strncmp_a_d_n( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[N:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = sext i1 [[TMP1]] to i32 +; CHECK-NEXT: store i32 [[TMP2]], i32* [[PCMP:%.*]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[N]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = sext i1 [[TMP3]] to i32 +; CHECK-NEXT: [[S0_1:%.*]] = getelementptr i32, i32* [[PCMP]], i64 1 +; CHECK-NEXT: store i32 [[TMP4]], i32* [[S0_1]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[N]], 3 +; CHECK-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[S1_1:%.*]] = getelementptr i32, i32* [[PCMP]], i64 2 +; CHECK-NEXT: store i32 [[TMP6]], i32* [[S1_1]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[N]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; CHECK-NEXT: [[S2_2:%.*]] = getelementptr i32, i32* [[PCMP]], i64 3 +; CHECK-NEXT: store i32 [[TMP8]], i32* [[S2_2]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i64 [[N]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i32 +; CHECK-NEXT: [[S4_4:%.*]] = getelementptr i32, i32* [[PCMP]], i64 4 +; CHECK-NEXT: store i32 [[TMP10]], i32* [[S4_4]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne i64 [[N]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = sext i1 [[TMP11]] to i32 +; CHECK-NEXT: [[S4_4_2:%.*]] = getelementptr i32, i32* [[PCMP]], i64 5 +; CHECK-NEXT: store i32 [[TMP12]], i32* [[S4_4_2]], align 4 +; CHECK-NEXT: [[S5_5:%.*]] = getelementptr i32, i32* [[PCMP]], i64 6 +; CHECK-NEXT: store i32 0, i32* [[S5_5]], align 4 +; CHECK-NEXT: [[S6_6:%.*]] = getelementptr i32, i32* [[PCMP]], i64 7 +; CHECK-NEXT: store i32 0, i32* [[S6_6]], align 4 +; CHECK-NEXT: ret void +; + + %p0 = getelementptr [8 x i8], [8 x i8]* @a01230123, i64 0, i64 0 + %p1 = getelementptr [8 x i8], [8 x i8]* @a01230123, i64 0, i64 1 + %p2 = getelementptr [8 x i8], [8 x i8]* @a01230123, i64 0, i64 2 + %p3 = getelementptr [8 x i8], [8 x i8]* @a01230123, i64 0, i64 3 + %p4 = getelementptr [8 x i8], [8 x i8]* @a01230123, i64 0, i64 4 + %p5 = getelementptr [8 x i8], [8 x i8]* @a01230123, i64 0, i64 5 + %p6 = getelementptr [8 x i8], [8 x i8]* @a01230123, i64 0, i64 6 + + %q0 = getelementptr [7 x i8], [7 x i8]* @d9123_12, i64 0, i64 0 + %q1 = getelementptr [7 x i8], [7 x i8]* @d9123_12, i64 0, i64 1 + %q2 = getelementptr [7 x i8], [7 x i8]* @d9123_12, i64 0, i64 2 + %q3 = getelementptr [7 x i8], [7 x i8]* @d9123_12, i64 0, i64 3 + %q4 = getelementptr [7 x i8], [7 x i8]* @d9123_12, i64 0, i64 4 + %q5 = getelementptr [7 x i8], [7 x i8]* @d9123_12, i64 0, i64 5 + %q6 = getelementptr [7 x i8], [7 x i8]* @d9123_12, i64 0, i64 6 + + ; Fold strncmp(a, d, n) to N != 0 ? -1 : 0. + %c0_0 = call i32 @strncmp(i8* %p0, i8* %q0, i64 %n) + %s0_0 = getelementptr i32, i32* %pcmp, i64 0 + store i32 %c0_0, i32* %s0_0 + + ; Fold strncmp(a, d + 1, n) to N != 0 ? -1 : 0. + %c0_1 = call i32 @strncmp(i8* %p0, i8* %q1, i64 %n) + %s0_1 = getelementptr i32, i32* %pcmp, i64 1 + store i32 %c0_1, i32* %s0_1 + + ; Fold strncmp(a + 1, d + 1, n) N > 3 ? +1 : 0. + %c1_1 = call i32 @strncmp(i8* %p1, i8* %q1, i64 %n) + %s1_1 = getelementptr i32, i32* %pcmp, i64 2 + store i32 %c1_1, i32* %s1_1 + + ; Fold strncmp(a + 2, d + 2, n) N > 2 ? +1 : 0. + %c2_2 = call i32 @strncmp(i8* %p2, i8* %q2, i64 %n) + %s2_2 = getelementptr i32, i32* %pcmp, i64 3 + store i32 %c2_2, i32* %s2_2 + + ; Fold strncmp(a + 3, d + 3, n) N > 1 ? +1 : 0. + %c3_3 = call i32 @strncmp(i8* %p3, i8* %q3, i64 %n) + %s3_3 = getelementptr i32, i32* %pcmp, i64 4 + store i32 %c3_3, i32* %s3_3 + + ; Fold strncmp(a + 4, d + 4, n) N != 0 ? +1 : 0. + %c4_4 = call i32 @strncmp(i8* %p4, i8* %q4, i64 %n) + %s4_4 = getelementptr i32, i32* %pcmp, i64 4 + store i32 %c4_4, i32* %s4_4 + + ; Fold strncmp(d + 4, a + 4, n) N != 0 ? -1 : 0 (same as above but + ; with the array arguments reversed). + %c4_4_2 = call i32 @strncmp(i8* %q4, i8* %p4, i64 %n) + %s4_4_2 = getelementptr i32, i32* %pcmp, i64 5 + store i32 %c4_4_2, i32* %s4_4_2 + + ; Fold strncmp(a + 5, d + 5, n) to 0. + %c5_5 = call i32 @strncmp(i8* %p5, i8* %q5, i64 %n) + %s5_5 = getelementptr i32, i32* %pcmp, i64 6 + store i32 %c5_5, i32* %s5_5 + + ; Fold strncmp(a + 6, d + 6, n) to 0. + %c6_6 = call i32 @strncmp(i8* %p6, i8* %q6, i64 %n) + %s6_6 = getelementptr i32, i32* %pcmp, i64 7 + store i32 %c6_6, i32* %s6_6 + + ret void +} + + +; Exercise strncmp(A, D, N) folding of arrays with the same bytes and +; a nonzero size. + +define void @fold_strncmp_a_d_nz(i32* %pcmp, i64 %n) { +; CHECK-LABEL: @fold_strncmp_a_d_nz( +; CHECK-NEXT: store i32 -1, i32* [[PCMP:%.*]], align 4 +; CHECK-NEXT: ret void +; + + %p0 = getelementptr [8 x i8], [8 x i8]* @a01230123, i64 0, i64 0 + %q0 = getelementptr [7 x i8], [7 x i8]* @d9123_12, i64 0, i64 0 + %nz = or i64 %n, 1 + + %c0_0 = call i32 @strncmp(i8* %p0, i8* %q0, i64 %nz) + %s0_0 = getelementptr i32, i32* %pcmp, i64 0 + store i32 %c0_0, i32* %s0_0 + + ret void +} + + +; Exercise strncmp(D, E, N) folding of equal strings but unequal arrays. + +define void @fold_strncmp_d_e_n(i32* %pcmp, i64 %n) { +; CHECK-LABEL: @fold_strncmp_d_e_n( +; CHECK-NEXT: store i32 0, i32* [[PCMP:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 [[N:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i32 +; CHECK-NEXT: [[S0_1:%.*]] = getelementptr i32, i32* [[PCMP]], i64 1 +; CHECK-NEXT: store i32 [[TMP2]], i32* [[S0_1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[N]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = sext i1 [[TMP3]] to i32 +; CHECK-NEXT: [[S1_0:%.*]] = getelementptr i32, i32* [[PCMP]], i64 2 +; CHECK-NEXT: store i32 [[TMP4]], i32* [[S1_0]], align 4 +; CHECK-NEXT: [[S1_1:%.*]] = getelementptr i32, i32* [[PCMP]], i64 3 +; CHECK-NEXT: store i32 0, i32* [[S1_1]], align 4 +; CHECK-NEXT: ret void +; + + %p0 = getelementptr [7 x i8], [7 x i8]* @d9123_12, i64 0, i64 0 + %p1 = getelementptr [7 x i8], [7 x i8]* @d9123_12, i64 0, i64 1 + + %q0 = getelementptr [7 x i8], [7 x i8]* @e9123_34, i64 0, i64 0 + %q1 = getelementptr [7 x i8], [7 x i8]* @e9123_34, i64 0, i64 1 + + ; Fold to 0. + %c0_0 = call i32 @strncmp(i8* %p0, i8* %q0, i64 %n) + %s0_0 = getelementptr i32, i32* %pcmp, i64 0 + store i32 %c0_0, i32* %s0_0 + + ; Fold to N ? +1 : 0. + %c0_1 = call i32 @strncmp(i8* %p0, i8* %q1, i64 %n) + %s0_1 = getelementptr i32, i32* %pcmp, i64 1 + store i32 %c0_1, i32* %s0_1 + + ; Fold to N ? -1 : 0. + %c1_0 = call i32 @strncmp(i8* %p1, i8* %q0, i64 %n) + %s1_0 = getelementptr i32, i32* %pcmp, i64 2 + store i32 %c1_0, i32* %s1_0 + + ; Fold to 0. + %c1_1 = call i32 @strncmp(i8* %p1, i8* %q1, i64 %n) + %s1_1 = getelementptr i32, i32* %pcmp, i64 3 + store i32 %c1_1, i32* %s1_1 + + ret void +} diff --git a/llvm/test/Transforms/InstCombine/strncmp-6.ll b/llvm/test/Transforms/InstCombine/strncmp-6.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/strncmp-6.ll @@ -0,0 +1,106 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=instcombine -S | FileCheck %s +; +; Exercise folding of strncmp calls with constant arrays including both +; negative and positive characters and both constant and nonconstant sizes. + +declare i32 @strncmp(i8*, i8*, i64) + +@a = constant [7 x i8] c"abcdef\7f" +@b = constant [7 x i8] c"abcdef\80" + + +; Exercise strncmp(A + C, B + C, 2) folding of small arrays that differ in +; a character with the opposite sign and a constant size. + +define void @fold_strncmp_cst_cst(i32* %pcmp) { +; CHECK-LABEL: @fold_strncmp_cst_cst( +; CHECK-NEXT: store i32 -1, i32* [[PCMP:%.*]], align 4 +; CHECK-NEXT: [[SB5_A5:%.*]] = getelementptr i32, i32* [[PCMP]], i64 1 +; CHECK-NEXT: store i32 1, i32* [[SB5_A5]], align 4 +; CHECK-NEXT: [[SA6_B6:%.*]] = getelementptr i32, i32* [[PCMP]], i64 2 +; CHECK-NEXT: store i32 -1, i32* [[SA6_B6]], align 4 +; CHECK-NEXT: [[SB6_A6:%.*]] = getelementptr i32, i32* [[PCMP]], i64 3 +; CHECK-NEXT: store i32 1, i32* [[SB6_A6]], align 4 +; CHECK-NEXT: ret void +; + %p5 = getelementptr [7 x i8], [7 x i8]* @a, i64 0, i64 5 + %p6 = getelementptr [7 x i8], [7 x i8]* @a, i64 0, i64 6 + + %q5 = getelementptr [7 x i8], [7 x i8]* @b, i64 0, i64 5 + %q6 = getelementptr [7 x i8], [7 x i8]* @b, i64 0, i64 6 + + ; Fold strncmp(a + 5, b + 5, 2) to -1. + %ca5_b5 = call i32 @strncmp(i8* %p5, i8* %q5, i64 2) + %sa5_b5 = getelementptr i32, i32* %pcmp, i64 0 + store i32 %ca5_b5, i32* %sa5_b5 + + ; Fold strncmp(b + 5, a + 5, 2) to +1. + %cb5_a5 = call i32 @strncmp(i8* %q5, i8* %p5, i64 2) + %sb5_a5 = getelementptr i32, i32* %pcmp, i64 1 + store i32 %cb5_a5, i32* %sb5_a5 + + ; Fold strncmp(a + 6, b + 6, 1) to -1. + %ca6_b6 = call i32 @strncmp(i8* %p6, i8* %q6, i64 1) + %sa6_b6 = getelementptr i32, i32* %pcmp, i64 2 + store i32 %ca6_b6, i32* %sa6_b6 + + ; Fold strncmp(b + 6, a + 6, 1) to +1. + %cb6_a6 = call i32 @strncmp(i8* %q6, i8* %p6, i64 1) + %sb6_a6 = getelementptr i32, i32* %pcmp, i64 3 + store i32 %cb6_a6, i32* %sb6_a6 + + ret void +} + + +; Exercise strncmp(A, B, N) folding of arrays that differ in a character +; with the opposite sign and a variable size + +define void @fold_strncmp_cst_var(i32* %pcmp, i64 %n) { +; CHECK-LABEL: @fold_strncmp_cst_var( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[N:%.*]], 6 +; CHECK-NEXT: [[TMP2:%.*]] = sext i1 [[TMP1]] to i32 +; CHECK-NEXT: store i32 [[TMP2]], i32* [[PCMP:%.*]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[N]], 6 +; CHECK-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: [[SB0_A0:%.*]] = getelementptr i32, i32* [[PCMP]], i64 1 +; CHECK-NEXT: store i32 [[TMP4]], i32* [[SB0_A0]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[N]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[SA6_B6:%.*]] = getelementptr i32, i32* [[PCMP]], i64 2 +; CHECK-NEXT: store i32 [[TMP6]], i32* [[SA6_B6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[N]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; CHECK-NEXT: [[SB6_A6:%.*]] = getelementptr i32, i32* [[PCMP]], i64 3 +; CHECK-NEXT: store i32 [[TMP8]], i32* [[SB6_A6]], align 4 +; CHECK-NEXT: ret void +; + %p0 = getelementptr [7 x i8], [7 x i8]* @a, i64 0, i64 0 + %p6 = getelementptr [7 x i8], [7 x i8]* @a, i64 0, i64 6 + + %q0 = getelementptr [7 x i8], [7 x i8]* @b, i64 0, i64 0 + %q6 = getelementptr [7 x i8], [7 x i8]* @b, i64 0, i64 6 + + ; Fold strncmp(a, b, n) to -1. + %ca0_b0 = call i32 @strncmp(i8* %p0, i8* %q0, i64 %n) + %sa0_b0 = getelementptr i32, i32* %pcmp, i64 0 + store i32 %ca0_b0, i32* %sa0_b0 + + ; Fold strncmp(b, a, n) to +1. + %cb0_a0 = call i32 @strncmp(i8* %q0, i8* %p0, i64 %n) + %sb0_a0 = getelementptr i32, i32* %pcmp, i64 1 + store i32 %cb0_a0, i32* %sb0_a0 + + ; Fold strncmp(a + 6, b + 6, n) to -1. + %ca6_b6 = call i32 @strncmp(i8* %p6, i8* %q6, i64 %n) + %sa6_b6 = getelementptr i32, i32* %pcmp, i64 2 + store i32 %ca6_b6, i32* %sa6_b6 + + ; Fold strncmp(b + 6, a + 6, n) to +1. + %cb6_a6 = call i32 @strncmp(i8* %q6, i8* %p6, i64 %n) + %sb6_a6 = getelementptr i32, i32* %pcmp, i64 3 + store i32 %cb6_a6, i32* %sb6_a6 + + ret void +}