Index: lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- lib/Transforms/Utils/SimplifyLibCalls.cpp +++ lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -535,6 +535,65 @@ if (uint64_t Len = GetStringLength(Src)) return ConstantInt::get(CI->getType(), Len - 1); + // If s is a constant pointer pointing to a string literal, we can fold + // strlen(s + x) to strlen(s) - x, when x is known to be in the range + // [0, strlen(s)] or the string has a single null terminator '\0' at the end. + if (GEPOperator *GEP = dyn_cast(Src)) { + if (GEP->getNumOperands() != 3) + return nullptr; + + // We only try to simplify strlen when the pointer s points to an array + // of i8. Otherwise, we would need to scale the offset x before doing the + // subtraction. This will make the optimization more complex, and it's not + // very useful because calling strlen for a pointer of other types is + // very uncommon. + PointerType *PT = cast(GEP->getOperand(0)->getType()); + ArrayType *AT = dyn_cast(PT->getElementType()); + if (!AT || !AT->getElementType()->isIntegerTy(8)) + return nullptr; + + // First index should be 0. + const ConstantInt *FirstIdx = dyn_cast(GEP->getOperand(1)); + if (!FirstIdx || !FirstIdx->isZero()) + return nullptr; + + StringRef Str; + size_t ArrSize = AT->getNumElements(); + if (getConstantStringInfo(GEP->getOperand(0), Str, 0, false)) { + size_t NullTermIdx = Str.find('\0'); + + // If the string does not have '\0', leave it to strlen to compute + // its length. + if (NullTermIdx == StringRef::npos) + return nullptr; + + Value *Offset = GEP->getOperand(2); + unsigned BitWidth = Offset->getType()->getIntegerBitWidth(); + APInt KnownZero(BitWidth, 0); + APInt KnownOne(BitWidth, 0); + computeKnownBits(Offset, KnownZero, KnownOne, DL, 0, nullptr, CI, + nullptr); + KnownZero.flipAllBits(); + // KnownZero's bits are flipped, so zeros in KnownZero now represent + // bits known to be zeros in Offset, and ones in KnowZero represent + // bits unknown in Offset. Therefore, Offset is known to be in range + // [0, NullTermIdx] when the flipped KnownZero is non-negative and + // unsigned-less-than NullTermIdx. + // + // If Offset is not provably in the range [0, NullTermIdx], we can still + // optimize if we can prove that the program has undefined behavior when + // Offset is outside that range. That is the case when GEP->getOperand(0) + // is a pointer to an object whose memory extent is NullTermIdx+1. + if ((KnownZero.isNonNegative() && KnownZero.ule(NullTermIdx)) || + (GEP->isInBounds() && isa(GEP->getOperand(0)) && + NullTermIdx == ArrSize - 1)) + return B.CreateSub(ConstantInt::get(CI->getType(), NullTermIdx), + Offset); + } + + return nullptr; + } + // strlen(x?"foo":"bars") --> x ? 3 : 4 if (SelectInst *SI = dyn_cast(Src)) { uint64_t LenTrue = GetStringLength(SI->getTrueValue()); Index: test/Transforms/InstCombine/strlen-1.ll =================================================================== --- test/Transforms/InstCombine/strlen-1.ll +++ test/Transforms/InstCombine/strlen-1.ll @@ -10,6 +10,7 @@ @null_hello = constant [7 x i8] c"\00hello\00" @nullstring = constant i8 0 @a = common global [32 x i8] zeroinitializer, align 1 +@null_hello_mid = constant [13 x i8] c"hello wor\00ld\00" declare i32 @strlen(i8*) @@ -97,6 +98,28 @@ ; CHECK-NEXT: ret } +; Check the case that should be simplified to a sub instruction. +; strlen(@hello + x) --> 5 - x + +define i32 @test_simplify10(i32 %x) { +; CHECK-LABEL: @test_simplify10( + %hello_p = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 %x + %hello_l = call i32 @strlen(i8* %hello_p) +; CHECK-NEXT: %1 = sub i32 5, %x + ret i32 %hello_l +} + +; strlen(@null_hello_mid + (x & 7)) --> 9 - (x & 7) +define i32 @test_simplify11(i32 %x) { +; CHECK-LABEL: @test_simplify11( + %and = and i32 %x, 7 + %hello_p = getelementptr inbounds [13 x i8], [13 x i8]* @null_hello_mid, i32 0, i32 %and + %hello_l = call i32 @strlen(i8* %hello_p) +; CHECK-NEXT: and i32 +; CHECK-NEXT: %1 = sub nsw i32 9, %and + ret i32 %hello_l +} + ; Check cases that shouldn't be simplified. define i32 @test_no_simplify1() { @@ -107,3 +130,26 @@ ret i32 %a_l ; CHECK-NEXT: ret i32 %a_l } + +; strlen(@null_hello + x) should not be simpified to a sub instruction. + +define i32 @test_no_simplify2(i32 %x) { +; CHECK-LABEL: @test_no_simplify2( + %hello_p = getelementptr inbounds [7 x i8], [7 x i8]* @null_hello, i32 0, i32 %x + %hello_l = call i32 @strlen(i8* %hello_p) +; CHECK-NEXT: getelementptr +; CHECK-NEXT: call i32 @strlen + ret i32 %hello_l +} + +; strlen(@null_hello_mid + (x & 15)) should not be simplified to a sub instruction. +define i32 @test_no_simplify3(i32 %x) { +; CHECK-LABEL: @test_no_simplify3( + %and = and i32 %x, 15 + %hello_p = getelementptr inbounds [13 x i8], [13 x i8]* @null_hello_mid, i32 0, i32 %and + %hello_l = call i32 @strlen(i8* %hello_p) +; CHECK-NEXT: and i32 +; CHECK-NEXT: getelementptr +; CHECK-NEXT: call i32 @strlen + ret i32 %hello_l +}