Index: llvm/include/llvm/Analysis/ValueTracking.h =================================================================== --- llvm/include/llvm/Analysis/ValueTracking.h +++ llvm/include/llvm/Analysis/ValueTracking.h @@ -124,6 +124,10 @@ const DominatorTree *DT = nullptr, bool UseInstrInfo = true); +/// Return true if the given instruction is only used in zero comparison +bool isOnlyUsedInZeroComparison(const Instruction *CxtI); + +/// Return true if the given instruction is only used in zero equality comparison bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI); /// Return true if the given value is known to be non-zero when defined. For Index: llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h =================================================================== --- llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h +++ llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h @@ -8,7 +8,7 @@ /// \file /// /// AggressiveInstCombiner - Combine expression patterns to form expressions -/// with fewer, simple instructions. This pass does not modify the CFG. +/// with fewer, simple instructions. /// //===----------------------------------------------------------------------===// Index: llvm/lib/Analysis/ValueTracking.cpp =================================================================== --- llvm/lib/Analysis/ValueTracking.cpp +++ llvm/lib/Analysis/ValueTracking.cpp @@ -261,6 +261,13 @@ return KnownBits::haveNoCommonBitsSet(LHSKnown, RHSKnown); } +bool llvm::isOnlyUsedInZeroComparison(const Instruction *I) { + return !I->user_empty() && all_of(I->users(), [](const User *U) { + ICmpInst::Predicate P; + return match(U, m_ICmp(P, m_Value(), m_Zero())); + }); +} + bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *I) { return !I->user_empty() && all_of(I->users(), [](const User *U) { ICmpInst::Predicate P; Index: llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp =================================================================== --- llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -28,6 +29,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" @@ -396,54 +398,6 @@ return true; } -/// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids -/// pessimistic codegen that has to account for setting errno and can enable -/// vectorization. -static bool foldSqrt(Instruction &I, TargetTransformInfo &TTI, - TargetLibraryInfo &TLI, AssumptionCache &AC, - DominatorTree &DT) { - // Match a call to sqrt mathlib function. - auto *Call = dyn_cast(&I); - if (!Call) - return false; - - Module *M = Call->getModule(); - LibFunc Func; - if (!TLI.getLibFunc(*Call, Func) || !isLibFuncEmittable(M, &TLI, Func)) - return false; - - if (Func != LibFunc_sqrt && Func != LibFunc_sqrtf && Func != LibFunc_sqrtl) - return false; - - // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created - // (because NNAN or the operand arg must not be less than -0.0) and (2) we - // would not end up lowering to a libcall anyway (which could change the value - // of errno), then: - // (1) errno won't be set. - // (2) it is safe to convert this to an intrinsic call. - Type *Ty = Call->getType(); - Value *Arg = Call->getArgOperand(0); - if (TTI.haveFastSqrt(Ty) && - (Call->hasNoNaNs() || - cannotBeOrderedLessThanZero(Arg, M->getDataLayout(), &TLI, 0, &AC, &I, - &DT))) { - IRBuilder<> Builder(&I); - IRBuilderBase::FastMathFlagGuard Guard(Builder); - Builder.setFastMathFlags(Call->getFastMathFlags()); - - Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, Ty); - Value *NewSqrt = Builder.CreateCall(Sqrt, Arg, "sqrt"); - I.replaceAllUsesWith(NewSqrt); - - // Explicitly erase the old call because a call with side effects is not - // trivially dead. - I.eraseFromParent(); - return true; - } - - return false; -} - // Check if this array of constants represents a cttz table. // Iterate over the elements from \p Table by trying to find/match all // the numbers from 0 to \p InputBits that should represent cttz results. @@ -915,13 +869,159 @@ return true; } +/// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids +/// pessimistic codegen that has to account for setting errno and can enable +/// vectorization. +static bool foldSqrt(CallInst *Call, TargetTransformInfo &TTI, + TargetLibraryInfo &TLI, AssumptionCache &AC, + DominatorTree &DT) { + Module *M = Call->getModule(); + + // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created + // (because NNAN or the operand arg must not be less than -0.0) and (2) we + // would not end up lowering to a libcall anyway (which could change the value + // of errno), then: + // (1) errno won't be set. + // (2) it is safe to convert this to an intrinsic call. + Type *Ty = Call->getType(); + Value *Arg = Call->getArgOperand(0); + if (TTI.haveFastSqrt(Ty) && + (Call->hasNoNaNs() || + cannotBeOrderedLessThanZero(Arg, M->getDataLayout(), &TLI, 0, &AC, Call, + &DT))) { + IRBuilder<> Builder(Call); + IRBuilderBase::FastMathFlagGuard Guard(Builder); + Builder.setFastMathFlags(Call->getFastMathFlags()); + + Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, Ty); + Value *NewSqrt = Builder.CreateCall(Sqrt, Arg, "sqrt"); + Call->replaceAllUsesWith(NewSqrt); + + // Explicitly erase the old call because a call with side effects is not + // trivially dead. + Call->eraseFromParent(); + return true; + } + + return false; +} + +/// Try to expand strcmp(P, "x") calls. +static bool expandStrcmp(CallInst *CI, DominatorTree &DT, bool &MadeCFGChange) { + Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); + + // Trivial cases are optimized during inst combine + if (Str1P == Str2P) + return false; + + StringRef Str1, Str2; + bool HasStr1 = getConstantStringInfo(Str1P, Str1); + bool HasStr2 = getConstantStringInfo(Str2P, Str2); + + Value *NonConstantP = nullptr; + StringRef ConstantStr; + + if (!HasStr1 && HasStr2 && Str2.size() == 1) { + NonConstantP = Str1P; + ConstantStr = Str2; + } else if (!HasStr2 && HasStr1 && Str1.size() == 1) { + NonConstantP = Str2P; + ConstantStr = Str1; + } else { + return false; + } + + // Check if strcmp result is only used in a comparison with zero + if (!isOnlyUsedInZeroComparison(CI)) + return false; + + // For strcmp(P, "x") do the following transformation: + // + // (before) + // dst = strcmp(P, "x") + // + // (after) + // v0 = P[0] - 'x' + // [if v0 == 0] + // v1 = P[1] + // dst = phi(v0, v1) + // + + IRBuilder<> B(CI->getParent()); + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); + + Type *RetType = CI->getType(); + + B.SetInsertPoint(CI); + BasicBlock *InitialBB = B.GetInsertBlock(); + Value *Str1FirstCharacterValue = + B.CreateZExt(B.CreateLoad(B.getInt8Ty(), NonConstantP), RetType); + Value *Str2FirstCharacterValue = + ConstantInt::get(RetType, static_cast(ConstantStr[0])); + Value *FirstCharacterSub = + B.CreateNSWSub(Str1FirstCharacterValue, Str2FirstCharacterValue); + Value *IsFirstCharacterSubZero = + B.CreateICmpEQ(FirstCharacterSub, ConstantInt::get(RetType, 0)); + Instruction *IsFirstCharacterSubZeroBBTerminator = SplitBlockAndInsertIfThen( + IsFirstCharacterSubZero, CI, /*Unreachable*/ false, + /*BranchWeights*/ nullptr, &DTU); + + B.SetInsertPoint(IsFirstCharacterSubZeroBBTerminator); + B.GetInsertBlock()->setName("strcmp_expand_sub_is_zero"); + BasicBlock *IsFirstCharacterSubZeroBB = B.GetInsertBlock(); + Value *Str1SecondCharacterValue = B.CreateZExt( + B.CreateLoad(B.getInt8Ty(), B.CreateConstInBoundsGEP1_64( + B.getInt8Ty(), NonConstantP, 1)), + RetType); + + B.SetInsertPoint(CI); + B.GetInsertBlock()->setName("strcmp_expand_sub_join"); + + PHINode *Result = B.CreatePHI(RetType, 2); + Result->addIncoming(FirstCharacterSub, InitialBB); + Result->addIncoming(Str1SecondCharacterValue, IsFirstCharacterSubZeroBB); + + CI->replaceAllUsesWith(Result); + CI->eraseFromParent(); + + MadeCFGChange = true; + + return true; +} + +static bool foldLibraryCalls(Instruction &I, TargetTransformInfo &TTI, + TargetLibraryInfo &TLI, DominatorTree &DT, + AssumptionCache &AC, bool &MadeCFGChange) { + CallInst *CI = dyn_cast(&I); + if (!CI) + return false; + + LibFunc Func; + Module *M = I.getModule(); + if (!TLI.getLibFunc(*CI, Func) || !isLibFuncEmittable(M, &TLI, Func)) + return false; + + switch (Func) { + case LibFunc_sqrt: + case LibFunc_sqrtf: + case LibFunc_sqrtl: + return foldSqrt(CI, TTI, TLI, AC, DT); + case LibFunc_strcmp: + return expandStrcmp(CI, DT, MadeCFGChange); + default: + break; + } + + return false; +} + /// This is the entry point for folds that could be implemented in regular /// InstCombine, but they are separated because they are not expected to /// occur frequently and/or have more than a constant-length pattern match. static bool foldUnusualPatterns(Function &F, DominatorTree &DT, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AliasAnalysis &AA, - AssumptionCache &AC) { + AssumptionCache &AC, bool &MadeCFGChange) { bool MadeChange = false; for (BasicBlock &BB : F) { // Ignore unreachable basic blocks. @@ -946,7 +1046,7 @@ // NOTE: This function introduces erasing of the instruction `I`, so it // needs to be called at the end of this sequence, otherwise we may make // bugs. - MadeChange |= foldSqrt(I, TTI, TLI, AC, DT); + MadeChange |= foldLibraryCalls(I, TTI, TLI, DT, AC, MadeCFGChange); } } @@ -962,12 +1062,12 @@ /// handled in the callers of this function. static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, DominatorTree &DT, - AliasAnalysis &AA) { + AliasAnalysis &AA, bool &ChangedCFG) { bool MadeChange = false; const DataLayout &DL = F.getParent()->getDataLayout(); TruncInstCombine TIC(AC, TLI, DL, DT); MadeChange |= TIC.run(F); - MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC); + MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC, ChangedCFG); return MadeChange; } @@ -978,12 +1078,21 @@ auto &DT = AM.getResult(F); auto &TTI = AM.getResult(F); auto &AA = AM.getResult(F); - if (!runImpl(F, AC, TTI, TLI, DT, AA)) { + + bool MadeCFGChange = false; + + if (!runImpl(F, AC, TTI, TLI, DT, AA, MadeCFGChange)) { // No changes, all analyses are preserved. return PreservedAnalyses::all(); } + // Mark all the analyses that instcombine updates as preserved. PreservedAnalyses PA; - PA.preserveSet(); + + if (MadeCFGChange) + PA.preserve(); + else + PA.preserveSet(); + return PA; } Index: llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -227,21 +227,9 @@ return ConstantInt::get(RetTy, Result); } -static bool isOnlyUsedInComparisonWithZero(Value *V) { - for (User *U : V->users()) { - if (ICmpInst *IC = dyn_cast(U)) - if (Constant *C = dyn_cast(IC->getOperand(1))) - if (C->isNullValue()) - continue; - // Unknown instruction. - return false; - } - return true; -} - static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len, const DataLayout &DL) { - if (!isOnlyUsedInComparisonWithZero(CI)) + if (!isOnlyUsedInZeroComparison(CI)) return false; if (!isDereferenceableAndAlignedPointer(Str, Align(1), APInt(64, Len), DL)) Index: llvm/test/Transforms/AggressiveInstCombine/strcmp.ll =================================================================== --- llvm/test/Transforms/AggressiveInstCombine/strcmp.ll +++ llvm/test/Transforms/AggressiveInstCombine/strcmp.ll @@ -24,8 +24,19 @@ define i1 @expand_strcmp_eq_s1(ptr %C) { ; CHECK-LABEL: @expand_strcmp_eq_s1( -; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr noundef @s1) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[C:%.*]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = sub nsw i32 [[TMP2]], 48 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[STRCMP_EXPAND_SUB_IS_ZERO:%.*]], label [[STRCMP_EXPAND_SUB_JOIN:%.*]] +; CHECK: strcmp_expand_sub_is_zero: +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 1 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32 +; CHECK-NEXT: br label [[STRCMP_EXPAND_SUB_JOIN]] +; CHECK: strcmp_expand_sub_join: +; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[TMP7]], [[STRCMP_EXPAND_SUB_IS_ZERO]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP8]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %call = call i32 @strcmp(ptr %C, ptr noundef @s1) @@ -35,8 +46,19 @@ define i1 @expand_strcmp_eq_s1_commuted(ptr %C) { ; CHECK-LABEL: @expand_strcmp_eq_s1_commuted( -; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(ptr noundef @s1, ptr [[C:%.*]]) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[C:%.*]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = sub nsw i32 [[TMP2]], 48 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[STRCMP_EXPAND_SUB_IS_ZERO:%.*]], label [[STRCMP_EXPAND_SUB_JOIN:%.*]] +; CHECK: strcmp_expand_sub_is_zero: +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 1 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32 +; CHECK-NEXT: br label [[STRCMP_EXPAND_SUB_JOIN]] +; CHECK: strcmp_expand_sub_join: +; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[TMP7]], [[STRCMP_EXPAND_SUB_IS_ZERO]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP8]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %call = call i32 @strcmp(ptr noundef @s1, ptr %C) @@ -46,8 +68,19 @@ define i1 @expand_strcmp_ne_s1(ptr %C) { ; CHECK-LABEL: @expand_strcmp_ne_s1( -; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr noundef @s1) -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[C:%.*]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = sub nsw i32 [[TMP2]], 48 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[STRCMP_EXPAND_SUB_IS_ZERO:%.*]], label [[STRCMP_EXPAND_SUB_JOIN:%.*]] +; CHECK: strcmp_expand_sub_is_zero: +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 1 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32 +; CHECK-NEXT: br label [[STRCMP_EXPAND_SUB_JOIN]] +; CHECK: strcmp_expand_sub_join: +; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[TMP7]], [[STRCMP_EXPAND_SUB_IS_ZERO]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP8]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %call = call i32 @strcmp(ptr %C, ptr noundef @s1) @@ -57,8 +90,19 @@ define i1 @expand_strcmp_sgt_s1(ptr %C) { ; CHECK-LABEL: @expand_strcmp_sgt_s1( -; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr noundef @s1) -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[C:%.*]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = sub nsw i32 [[TMP2]], 48 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[STRCMP_EXPAND_SUB_IS_ZERO:%.*]], label [[STRCMP_EXPAND_SUB_JOIN:%.*]] +; CHECK: strcmp_expand_sub_is_zero: +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 1 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32 +; CHECK-NEXT: br label [[STRCMP_EXPAND_SUB_JOIN]] +; CHECK: strcmp_expand_sub_join: +; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[TMP7]], [[STRCMP_EXPAND_SUB_IS_ZERO]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %call = call i32 @strcmp(ptr %C, ptr noundef @s1) @@ -68,8 +112,19 @@ define i1 @expand_strcmp_sge_s1(ptr %C) { ; CHECK-LABEL: @expand_strcmp_sge_s1( -; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr noundef @s1) -; CHECK-NEXT: [[CMP:%.*]] = icmp sge i32 [[CALL]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[C:%.*]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = sub nsw i32 [[TMP2]], 48 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[STRCMP_EXPAND_SUB_IS_ZERO:%.*]], label [[STRCMP_EXPAND_SUB_JOIN:%.*]] +; CHECK: strcmp_expand_sub_is_zero: +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 1 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32 +; CHECK-NEXT: br label [[STRCMP_EXPAND_SUB_JOIN]] +; CHECK: strcmp_expand_sub_join: +; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[TMP7]], [[STRCMP_EXPAND_SUB_IS_ZERO]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i32 [[TMP8]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %call = call i32 @strcmp(ptr %C, ptr noundef @s1) @@ -79,8 +134,19 @@ define i1 @expand_strcmp_slt_s1(ptr %C) { ; CHECK-LABEL: @expand_strcmp_slt_s1( -; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr noundef @s1) -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[C:%.*]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = sub nsw i32 [[TMP2]], 48 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[STRCMP_EXPAND_SUB_IS_ZERO:%.*]], label [[STRCMP_EXPAND_SUB_JOIN:%.*]] +; CHECK: strcmp_expand_sub_is_zero: +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 1 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32 +; CHECK-NEXT: br label [[STRCMP_EXPAND_SUB_JOIN]] +; CHECK: strcmp_expand_sub_join: +; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[TMP7]], [[STRCMP_EXPAND_SUB_IS_ZERO]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP8]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %call = call i32 @strcmp(ptr %C, ptr noundef @s1) @@ -90,8 +156,19 @@ define i1 @expand_strcmp_sle_s1(ptr %C) { ; CHECK-LABEL: @expand_strcmp_sle_s1( -; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr noundef @s1) -; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[CALL]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[C:%.*]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = sub nsw i32 [[TMP2]], 48 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[STRCMP_EXPAND_SUB_IS_ZERO:%.*]], label [[STRCMP_EXPAND_SUB_JOIN:%.*]] +; CHECK: strcmp_expand_sub_is_zero: +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 1 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32 +; CHECK-NEXT: br label [[STRCMP_EXPAND_SUB_JOIN]] +; CHECK: strcmp_expand_sub_join: +; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[TMP7]], [[STRCMP_EXPAND_SUB_IS_ZERO]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %call = call i32 @strcmp(ptr %C, ptr noundef @s1)