Index: llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp =================================================================== --- llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -906,7 +906,7 @@ return false; } -/// Try to expand strcmp(P, "x") calls. +/// Try to expand strcmp(P, string_literal) where string_literal size is 1 or 2 static bool expandStrcmp(CallInst *CI, DominatorTree &DT, bool &MadeCFGChange) { Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); @@ -921,16 +921,27 @@ Value *NonConstantP = nullptr; StringRef ConstantStr; - if (!HasStr1 && HasStr2 && Str2.size() == 1) { + if (!HasStr1 && HasStr2) { NonConstantP = Str1P; ConstantStr = Str2; - } else if (!HasStr2 && HasStr1 && Str1.size() == 1) { + } else if (!HasStr2 && HasStr1) { NonConstantP = Str2P; ConstantStr = Str1; } else { return false; } + size_t ConstantStrSize = ConstantStr.size(); + + // Trivial cases are optimized during inst combine + if (ConstantStrSize == 0) { + return false; + } + + if (ConstantStrSize > 2) { + return false; + } + // Check if strcmp result is only used in a comparison with zero if (!isOnlyUsedInZeroComparison(CI)) return false; @@ -946,42 +957,76 @@ // v1 = P[1] // dst = phi(v0, v1) // + // For strcmp(P, "xy") do the following transformation: + // + // (before) + // dst = strcmp(P, "x") + // + // (after) + // v0 = P[0] - 'x' + // [if v0 == 0] + // v1 = P[1] - 'y' + // [if v1 == 0] + // v2 = P[2] + // dst = phi(v0, v1, v2) + // IRBuilder<> B(CI->getParent()); DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); Type *RetType = CI->getType(); - B.SetInsertPoint(CI); - BasicBlock *InitialBB = B.GetInsertBlock(); - Value *Str1FirstCharacterValue = - B.CreateZExt(B.CreateLoad(B.getInt8Ty(), NonConstantP), RetType); - Value *Str2FirstCharacterValue = - ConstantInt::get(RetType, static_cast(ConstantStr[0])); - Value *FirstCharacterSub = - B.CreateNSWSub(Str1FirstCharacterValue, Str2FirstCharacterValue); - Value *IsFirstCharacterSubZero = - B.CreateICmpEQ(FirstCharacterSub, ConstantInt::get(RetType, 0)); - Instruction *IsFirstCharacterSubZeroBBTerminator = SplitBlockAndInsertIfThen( - IsFirstCharacterSubZero, CI, /*Unreachable*/ false, - /*BranchWeights*/ nullptr, &DTU); - - B.SetInsertPoint(IsFirstCharacterSubZeroBBTerminator); - B.GetInsertBlock()->setName("strcmp_expand_sub_is_zero"); - BasicBlock *IsFirstCharacterSubZeroBB = B.GetInsertBlock(); - Value *Str1SecondCharacterValue = B.CreateZExt( - B.CreateLoad(B.getInt8Ty(), B.CreateConstInBoundsGEP1_64( - B.getInt8Ty(), NonConstantP, 1)), - RetType); + BasicBlock *InitialBB = CI->getParent(); + BasicBlock *JoinBlock = SplitBlock(InitialBB, CI, &DTU); + JoinBlock->setName("strcmp_expand_sub_join"); B.SetInsertPoint(CI); - B.GetInsertBlock()->setName("strcmp_expand_sub_join"); + PHINode *ResultPHI = B.CreatePHI(RetType, ConstantStrSize + 1); + + B.SetInsertPoint(InitialBB); + InitialBB->getTerminator()->eraseFromParent(); + + SmallVector DTUpdates; + + size_t CharacterIndexToCheck = 0; + for (; CharacterIndexToCheck < ConstantStrSize; ++CharacterIndexToCheck) { + Value *StrCharacterValue = B.CreateZExt( + B.CreateLoad(B.getInt8Ty(), + B.CreateConstInBoundsGEP1_64(B.getInt8Ty(), NonConstantP, + CharacterIndexToCheck)), + RetType); + Value *ConstantStrCharacterValue = ConstantInt::get( + RetType, + static_cast(ConstantStr[CharacterIndexToCheck])); + Value *CharacterSub = + B.CreateNSWSub(StrCharacterValue, ConstantStrCharacterValue); + Value *IsCharacterSubZero = + B.CreateICmpEQ(CharacterSub, ConstantInt::get(RetType, 0)); + BasicBlock *IsCharacterSubZeroBB = + BasicBlock::Create(B.getContext(), "strcmp_expand_sub_is_zero", + InitialBB->getParent(), JoinBlock); + B.CreateCondBr(IsCharacterSubZero, IsCharacterSubZeroBB, JoinBlock); + + ResultPHI->addIncoming(CharacterSub, B.GetInsertBlock()); + DTUpdates.emplace_back(DominatorTree::Insert, B.GetInsertBlock(), + IsCharacterSubZeroBB); + + B.SetInsertPoint(IsCharacterSubZeroBB); + DTUpdates.emplace_back(DominatorTree::Insert, IsCharacterSubZeroBB, + JoinBlock); + } + + Value *StrLastCharacterValue = B.CreateZExt( + B.CreateLoad(B.getInt8Ty(), + B.CreateConstInBoundsGEP1_64(B.getInt8Ty(), NonConstantP, + CharacterIndexToCheck)), + RetType); + ResultPHI->addIncoming(StrLastCharacterValue, B.GetInsertBlock()); + B.CreateBr(JoinBlock); - PHINode *Result = B.CreatePHI(RetType, 2); - Result->addIncoming(FirstCharacterSub, InitialBB); - Result->addIncoming(Str1SecondCharacterValue, IsFirstCharacterSubZeroBB); + DTU.applyUpdates(DTUpdates); - CI->replaceAllUsesWith(Result); + CI->replaceAllUsesWith(ResultPHI); CI->eraseFromParent(); MadeCFGChange = true; Index: llvm/test/Transforms/AggressiveInstCombine/strcmp.ll =================================================================== --- llvm/test/Transforms/AggressiveInstCombine/strcmp.ll +++ llvm/test/Transforms/AggressiveInstCombine/strcmp.ll @@ -209,8 +209,26 @@ define i1 @expand_strcmp_eq_s2(ptr %C) { ; CHECK-LABEL: @expand_strcmp_eq_s2( -; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s2) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[C:%.*]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = sub nsw i32 [[TMP2]], 48 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[STRCMP_EXPAND_SUB_IS_ZERO:%.*]], label [[STRCMP_EXPAND_SUB_JOIN:%.*]] +; CHECK: strcmp_expand_sub_is_zero: +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 1 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32 +; CHECK-NEXT: [[TMP8:%.*]] = sub nsw i32 [[TMP7]], 49 +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[TMP9]], label [[STRCMP_EXPAND_SUB_IS_ZERO1:%.*]], label [[STRCMP_EXPAND_SUB_JOIN]] +; CHECK: strcmp_expand_sub_is_zero1: +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 2 +; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP11]] to i32 +; CHECK-NEXT: br label [[STRCMP_EXPAND_SUB_JOIN]] +; CHECK: strcmp_expand_sub_join: +; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[TMP8]], [[STRCMP_EXPAND_SUB_IS_ZERO]] ], [ [[TMP12]], [[STRCMP_EXPAND_SUB_IS_ZERO1]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP13]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %call = call i32 @strcmp(ptr %C, ptr @s2) @@ -220,8 +238,26 @@ define i1 @expand_strcmp_ne_s2(ptr %C) { ; CHECK-LABEL: @expand_strcmp_ne_s2( -; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s2) -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[C:%.*]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = sub nsw i32 [[TMP2]], 48 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[STRCMP_EXPAND_SUB_IS_ZERO:%.*]], label [[STRCMP_EXPAND_SUB_JOIN:%.*]] +; CHECK: strcmp_expand_sub_is_zero: +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 1 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32 +; CHECK-NEXT: [[TMP8:%.*]] = sub nsw i32 [[TMP7]], 49 +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[TMP9]], label [[STRCMP_EXPAND_SUB_IS_ZERO1:%.*]], label [[STRCMP_EXPAND_SUB_JOIN]] +; CHECK: strcmp_expand_sub_is_zero1: +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 2 +; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP11]] to i32 +; CHECK-NEXT: br label [[STRCMP_EXPAND_SUB_JOIN]] +; CHECK: strcmp_expand_sub_join: +; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[TMP8]], [[STRCMP_EXPAND_SUB_IS_ZERO]] ], [ [[TMP12]], [[STRCMP_EXPAND_SUB_IS_ZERO1]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP13]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %call = call i32 @strcmp(ptr %C, ptr @s2) @@ -231,8 +267,26 @@ define i1 @expand_strcmp_sgt_s2(ptr %C) { ; CHECK-LABEL: @expand_strcmp_sgt_s2( -; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s2) -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CALL]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[C:%.*]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = sub nsw i32 [[TMP2]], 48 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[STRCMP_EXPAND_SUB_IS_ZERO:%.*]], label [[STRCMP_EXPAND_SUB_JOIN:%.*]] +; CHECK: strcmp_expand_sub_is_zero: +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 1 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32 +; CHECK-NEXT: [[TMP8:%.*]] = sub nsw i32 [[TMP7]], 49 +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[TMP9]], label [[STRCMP_EXPAND_SUB_IS_ZERO1:%.*]], label [[STRCMP_EXPAND_SUB_JOIN]] +; CHECK: strcmp_expand_sub_is_zero1: +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 2 +; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP11]] to i32 +; CHECK-NEXT: br label [[STRCMP_EXPAND_SUB_JOIN]] +; CHECK: strcmp_expand_sub_join: +; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[TMP8]], [[STRCMP_EXPAND_SUB_IS_ZERO]] ], [ [[TMP12]], [[STRCMP_EXPAND_SUB_IS_ZERO1]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %call = call i32 @strcmp(ptr %C, ptr @s2) @@ -242,8 +296,26 @@ define i1 @expand_strcmp_sge_s2(ptr %C) { ; CHECK-LABEL: @expand_strcmp_sge_s2( -; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s2) -; CHECK-NEXT: [[CMP:%.*]] = icmp sge i32 [[CALL]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[C:%.*]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = sub nsw i32 [[TMP2]], 48 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[STRCMP_EXPAND_SUB_IS_ZERO:%.*]], label [[STRCMP_EXPAND_SUB_JOIN:%.*]] +; CHECK: strcmp_expand_sub_is_zero: +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 1 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32 +; CHECK-NEXT: [[TMP8:%.*]] = sub nsw i32 [[TMP7]], 49 +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[TMP9]], label [[STRCMP_EXPAND_SUB_IS_ZERO1:%.*]], label [[STRCMP_EXPAND_SUB_JOIN]] +; CHECK: strcmp_expand_sub_is_zero1: +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 2 +; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP11]] to i32 +; CHECK-NEXT: br label [[STRCMP_EXPAND_SUB_JOIN]] +; CHECK: strcmp_expand_sub_join: +; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[TMP8]], [[STRCMP_EXPAND_SUB_IS_ZERO]] ], [ [[TMP12]], [[STRCMP_EXPAND_SUB_IS_ZERO1]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i32 [[TMP13]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %call = call i32 @strcmp(ptr %C, ptr @s2) @@ -253,8 +325,26 @@ define i1 @expand_strcmp_slt_s2(ptr %C) { ; CHECK-LABEL: @expand_strcmp_slt_s2( -; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s2) -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CALL]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[C:%.*]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = sub nsw i32 [[TMP2]], 48 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[STRCMP_EXPAND_SUB_IS_ZERO:%.*]], label [[STRCMP_EXPAND_SUB_JOIN:%.*]] +; CHECK: strcmp_expand_sub_is_zero: +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 1 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32 +; CHECK-NEXT: [[TMP8:%.*]] = sub nsw i32 [[TMP7]], 49 +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[TMP9]], label [[STRCMP_EXPAND_SUB_IS_ZERO1:%.*]], label [[STRCMP_EXPAND_SUB_JOIN]] +; CHECK: strcmp_expand_sub_is_zero1: +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 2 +; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP11]] to i32 +; CHECK-NEXT: br label [[STRCMP_EXPAND_SUB_JOIN]] +; CHECK: strcmp_expand_sub_join: +; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[TMP8]], [[STRCMP_EXPAND_SUB_IS_ZERO]] ], [ [[TMP12]], [[STRCMP_EXPAND_SUB_IS_ZERO1]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP13]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %call = call i32 @strcmp(ptr %C, ptr @s2) @@ -264,8 +354,26 @@ define i1 @expand_strcmp_sle_s2(ptr %C) { ; CHECK-LABEL: @expand_strcmp_sle_s2( -; CHECK-NEXT: [[CALL:%.*]] = call i32 @strcmp(ptr [[C:%.*]], ptr @s2) -; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[CALL]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[C:%.*]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = sub nsw i32 [[TMP2]], 48 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[STRCMP_EXPAND_SUB_IS_ZERO:%.*]], label [[STRCMP_EXPAND_SUB_JOIN:%.*]] +; CHECK: strcmp_expand_sub_is_zero: +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 1 +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i32 +; CHECK-NEXT: [[TMP8:%.*]] = sub nsw i32 [[TMP7]], 49 +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[TMP9]], label [[STRCMP_EXPAND_SUB_IS_ZERO1:%.*]], label [[STRCMP_EXPAND_SUB_JOIN]] +; CHECK: strcmp_expand_sub_is_zero1: +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 2 +; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP11]] to i32 +; CHECK-NEXT: br label [[STRCMP_EXPAND_SUB_JOIN]] +; CHECK: strcmp_expand_sub_join: +; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[TMP8]], [[STRCMP_EXPAND_SUB_IS_ZERO]] ], [ [[TMP12]], [[STRCMP_EXPAND_SUB_IS_ZERO1]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %call = call i32 @strcmp(ptr %C, ptr @s2)