diff --git a/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/llvm/lib/Transforms/Scalar/MergeICmps.cpp --- a/llvm/lib/Transforms/Scalar/MergeICmps.cpp +++ b/llvm/lib/Transforms/Scalar/MergeICmps.cpp @@ -48,6 +48,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/Pass.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include #include @@ -406,13 +407,6 @@ First.Rhs().Offset + First.SizeBits() / 8 == Second.Rhs().Offset; } - // Merges the given comparison blocks into one memcmp block and update - // branches. Comparisons are assumed to be continguous. If NextBBInChain is - // null, the merged block will link to the phi block. - void mergeComparisons(ArrayRef Comparisons, - BasicBlock *const NextBBInChain, PHINode &Phi, - const TargetLibraryInfo *const TLI, AliasAnalysis *AA); - PHINode &Phi_; std::vector Comparisons_; // The original entry block (before sorting); @@ -452,7 +446,7 @@ // chain before sorting. Unless we can abort the chain at this point // and start anew. // - // NOTE: we only handle block with single predecessor for now. + // NOTE: we only handle blocks a with single predecessor for now. if (Comparison.canSplit(AA)) { LLVM_DEBUG(dbgs() << "Split initial block '" << Comparison.BB->getName() @@ -540,8 +534,75 @@ } #endif // MERGEICMPS_DOT_ON +// Merges the given contiguous comparison blocks into one memcmp block. +static BasicBlock *mergeComparisons(ArrayRef Comparisons, + BasicBlock *const NextCmpBlock, + PHINode &Phi, + const TargetLibraryInfo *const TLI, + AliasAnalysis *AA) { + assert(!Comparisons.empty() && "merging zero comparisons"); + LLVMContext &Context = NextCmpBlock->getContext(); + BasicBlock *const PhiBB = Phi.getParent(); + const BCECmpBlock &FirstCmp = Comparisons[0]; + + // Create a new cmp block before next cmp block. + BasicBlock *const BB = + BasicBlock::Create(Context, "", NextCmpBlock->getParent(), NextCmpBlock); + IRBuilder<> Builder(BB); + // Add the GEPs from the first BCECmpBlock. + Value *const Lhs = Builder.Insert(FirstCmp.Lhs().GEP->clone()); + Value *const Rhs = Builder.Insert(FirstCmp.Rhs().GEP->clone()); + + Value *IsEqual = nullptr; + if (Comparisons.size() == 1) { + LLVM_DEBUG(dbgs() << "Only one comparison, updating branches\n"); + Value *const LhsLoad = Builder.CreateLoad(FirstCmp.Lhs().LoadI->getType(), Lhs); + Value *const RhsLoad = Builder.CreateLoad(FirstCmp.Rhs().LoadI->getType(), Rhs); + // There are no blocks to merge, just do the comparison. + IsEqual = Builder.CreateICmpEQ(LhsLoad, RhsLoad); + } else { + LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons\n"); + + // If there is one block that requires splitting, we do it now, i.e. + // just before we know we will collapse the chain. The instructions + // can be executed before any of the instructions in the chain. + const auto ToSplit = std::find_if(Comparisons.begin(), Comparisons.end(), + [](const BCECmpBlock &B) { return B.RequireSplit; }); + if (ToSplit != Comparisons.end()) { + LLVM_DEBUG(dbgs() << "Splitting non_BCE work to header\n"); + ToSplit->split(BB, AA); + } + + const unsigned TotalSizeBits = std::accumulate( + Comparisons.begin(), Comparisons.end(), 0u, + [](int Size, const BCECmpBlock &C) { return Size + C.SizeBits(); }); + + // Create memcmp() == 0. + const auto &DL = Phi.getModule()->getDataLayout(); + Value *const MemCmpCall = emitMemCmp( + Lhs, Rhs, + ConstantInt::get(DL.getIntPtrType(Context), TotalSizeBits / 8), Builder, + DL, TLI); + IsEqual = Builder.CreateICmpEQ( + MemCmpCall, ConstantInt::get(Type::getInt32Ty(Context), 0)); + } + + // Add a branch to the next basic block in the chain. + if (NextCmpBlock == PhiBB) { + // Continue to phi, passing it the comparison result. + Builder.CreateBr(Phi.getParent()); + Phi.addIncoming(IsEqual, BB); + } else { + // Continue to next block if equal, exit to phi else. + Builder.CreateCondBr(IsEqual, NextCmpBlock, PhiBB); + Phi.addIncoming(ConstantInt::getFalse(Context), BB); + } + return BB; +} + bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI, AliasAnalysis *AA) { + assert(Comparisons_.size() >= 2 && "simplifying trival BCECmpChain"); // First pass to check if there is at least one merge. If not, we don't do // anything and we keep analysis passes intact. { @@ -555,147 +616,41 @@ if (!AtLeastOneMerged) return false; } - // Remove phi references to comparison blocks, they will be rebuilt as we - // merge the blocks. - for (const auto &Comparison : Comparisons_) { - Phi_.removeIncomingValue(Comparison.BB, false); - } - - // If entry block is part of the chain, we need to make the first block - // of the chain the new entry block of the function. - BasicBlock *Entry = &Comparisons_[0].BB->getParent()->getEntryBlock(); - for (size_t I = 1; I < Comparisons_.size(); ++I) { - if (Entry == Comparisons_[I].BB) { - BasicBlock *NEntryBB = BasicBlock::Create(Entry->getContext(), "", - Entry->getParent(), Entry); - BranchInst::Create(Entry, NEntryBB); - break; - } - } - - // Point the predecessors of the chain to the first comparison block (which is - // the new entry point) and update the entry block of the chain. - if (EntryBlock_ != Comparisons_[0].BB) { - EntryBlock_->replaceAllUsesWith(Comparisons_[0].BB); - EntryBlock_ = Comparisons_[0].BB; - } - - // Effectively merge blocks. + // Effectively merge blocks. We go in the reverse direction from the phi block + // to that the next block is always available to branch to. int NumMerged = 1; - for (size_t I = 1; I < Comparisons_.size(); ++I) { - if (IsContiguous(Comparisons_[I - 1], Comparisons_[I])) { + BasicBlock *NextCmpBlock = Phi_.getParent(); + for (int I = static_cast(Comparisons_.size()) - 2; I >= 0; --I) { + if (IsContiguous(Comparisons_[I], Comparisons_[I + 1])) { ++NumMerged; } else { - // Merge all previous comparisons and start a new merge block. - mergeComparisons( - makeArrayRef(Comparisons_).slice(I - NumMerged, NumMerged), - Comparisons_[I].BB, Phi_, TLI, AA); + NextCmpBlock = + mergeComparisons(makeArrayRef(Comparisons_).slice(I + 1, NumMerged), + NextCmpBlock, Phi_, TLI, AA); NumMerged = 1; } } - mergeComparisons(makeArrayRef(Comparisons_) - .slice(Comparisons_.size() - NumMerged, NumMerged), - nullptr, Phi_, TLI, AA); - - return true; -} - -void BCECmpChain::mergeComparisons(ArrayRef Comparisons, - BasicBlock *const NextBBInChain, - PHINode &Phi, - const TargetLibraryInfo *const TLI, - AliasAnalysis *AA) { - assert(!Comparisons.empty()); - const auto &FirstComparison = *Comparisons.begin(); - BasicBlock *const BB = FirstComparison.BB; - LLVMContext &Context = BB->getContext(); - - if (Comparisons.size() >= 2) { - // If there is one block that requires splitting, we do it now, i.e. - // just before we know we will collapse the chain. The instructions - // can be executed before any of the instructions in the chain. - auto C = std::find_if(Comparisons.begin(), Comparisons.end(), - [](const BCECmpBlock &B) { return B.RequireSplit; }); - if (C != Comparisons.end()) - C->split(EntryBlock_, AA); - - LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons\n"); - const auto TotalSize = - std::accumulate(Comparisons.begin(), Comparisons.end(), 0, - [](int Size, const BCECmpBlock &C) { - return Size + C.SizeBits(); - }) / - 8; - - // Incoming edges do not need to be updated, and both GEPs are already - // computing the right address, we just need to: - // - replace the two loads and the icmp with the memcmp - // - update the branch - // - update the incoming values in the phi. - FirstComparison.BranchI->eraseFromParent(); - FirstComparison.CmpI->eraseFromParent(); - FirstComparison.Lhs().LoadI->eraseFromParent(); - FirstComparison.Rhs().LoadI->eraseFromParent(); - - IRBuilder<> Builder(BB); - const auto &DL = Phi.getModule()->getDataLayout(); - Value *const MemCmpCall = emitMemCmp( - FirstComparison.Lhs().GEP, FirstComparison.Rhs().GEP, - ConstantInt::get(DL.getIntPtrType(Context), TotalSize), - Builder, DL, TLI); - Value *const MemCmpIsZero = Builder.CreateICmpEQ( - MemCmpCall, ConstantInt::get(Type::getInt32Ty(Context), 0)); + NextCmpBlock = + mergeComparisons(makeArrayRef(Comparisons_).slice(0, NumMerged), + NextCmpBlock, Phi_, TLI, AA); - // Add a branch to the next basic block in the chain. - if (NextBBInChain) { - Builder.CreateCondBr(MemCmpIsZero, NextBBInChain, Phi.getParent()); - Phi.addIncoming(ConstantInt::getFalse(Context), BB); - } else { - Builder.CreateBr(Phi.getParent()); - Phi.addIncoming(MemCmpIsZero, BB); - } + // Replace the original cmp chain with the new cmp chain by pointing all + // predecessors of EntryBlock_ to NextCmpBlock instead. This makes all cmp + // blocks in the old chain unreachable. + for (BasicBlock* Pred : predecessors(EntryBlock_)) { + Pred->getTerminator()->replaceUsesOfWith(EntryBlock_, NextCmpBlock); + } + EntryBlock_ = nullptr; - // Delete merged blocks. - for (size_t I = 1; I < Comparisons.size(); ++I) { - BasicBlock *CBB = Comparisons[I].BB; - CBB->replaceAllUsesWith(BB); - CBB->eraseFromParent(); - } - } else { - assert(Comparisons.size() == 1); - // There are no blocks to merge, but we still need to update the branches. - LLVM_DEBUG(dbgs() << "Only one comparison, updating branches\n"); - if (NextBBInChain) { - if (FirstComparison.BranchI->isConditional()) { - LLVM_DEBUG(dbgs() << "conditional -> conditional\n"); - // Just update the "true" target, the "false" target should already be - // the phi block. - assert(FirstComparison.BranchI->getSuccessor(1) == Phi.getParent()); - FirstComparison.BranchI->setSuccessor(0, NextBBInChain); - Phi.addIncoming(ConstantInt::getFalse(Context), BB); - } else { - LLVM_DEBUG(dbgs() << "unconditional -> conditional\n"); - // Replace the unconditional branch by a conditional one. - FirstComparison.BranchI->eraseFromParent(); - IRBuilder<> Builder(BB); - Builder.CreateCondBr(FirstComparison.CmpI, NextBBInChain, - Phi.getParent()); - Phi.addIncoming(FirstComparison.CmpI, BB); - } - } else { - if (FirstComparison.BranchI->isConditional()) { - LLVM_DEBUG(dbgs() << "conditional -> unconditional\n"); - // Replace the conditional branch by an unconditional one. - FirstComparison.BranchI->eraseFromParent(); - IRBuilder<> Builder(BB); - Builder.CreateBr(Phi.getParent()); - Phi.addIncoming(FirstComparison.CmpI, BB); - } else { - LLVM_DEBUG(dbgs() << "unconditional -> unconditional\n"); - Phi.addIncoming(FirstComparison.CmpI, BB); - } - } + // Delete merged blocks. This also removes incoming values in phi. + SmallVector DeadBlocks; + for (auto &Cmp : Comparisons_) { + DeadBlocks.push_back(Cmp.BB); } + DeleteDeadBlocks(DeadBlocks); + + Comparisons_.clear(); + return true; } std::vector getOrderedBlocks(PHINode &Phi, diff --git a/llvm/test/Transforms/MergeICmps/X86/alias-merge-blocks.ll b/llvm/test/Transforms/MergeICmps/X86/alias-merge-blocks.ll --- a/llvm/test/Transforms/MergeICmps/X86/alias-merge-blocks.ll +++ b/llvm/test/Transforms/MergeICmps/X86/alias-merge-blocks.ll @@ -1,33 +1,30 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -mtriple=x86_64-unknown-unknown -mergeicmps -S | FileCheck %s --check-prefix=X86 -%"struct.std::pair" = type { i32, i32, i32, i32 } +%S = type { i32, i32, i32, i32 } +define zeroext i1 @opeq1( ; X86-LABEL: @opeq1( -; X86-NEXT: entry: ; X86-NEXT: [[PTR:%.*]] = alloca i32 ; X86-NEXT: store i32 42, i32* [[PTR]] -; X86-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 0 -; X86-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0 -; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[FIRST_I]] to i8* -; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[FIRST1_I]] to i8* +; X86-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0 +; X86-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0 +; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[TMP1]] to i8* +; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[TMP2]] to i8* ; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 16) -; X86-NEXT: [[TMP0:%.*]] = icmp eq i32 [[MEMCMP]], 0 +; X86-NEXT: [[TMP3:%.*]] = icmp eq i32 [[MEMCMP]], 0 ; X86-NEXT: br label [[OPEQ1_EXIT:%.*]] ; X86: opeq1.exit: -; X86-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[ENTRY:%.*]] ] -; X86-NEXT: ret i1 [[TMP1]] - -define zeroext i1 @opeq1( - - %"struct.std::pair"* nocapture readonly dereferenceable(16) %a, - %"struct.std::pair"* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 { +; X86-NEXT: ret i1 [[TMP3]] +; + %S* nocapture readonly dereferenceable(16) %a, + %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 { entry: %ptr = alloca i32 - %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0 + %first.i = getelementptr inbounds %S, %S* %a, i64 0, i32 0 %0 = load i32, i32* %first.i, align 4 - %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0 + %first1.i = getelementptr inbounds %S, %S* %b, i64 0, i32 0 %1 = load i32, i32* %first1.i, align 4 ; Does other work, has no interference, merge block store i32 42, i32* %ptr @@ -35,25 +32,25 @@ br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit land.rhs.i: - %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1 + %second.i = getelementptr inbounds %S, %S* %a, i64 0, i32 1 %2 = load i32, i32* %second.i, align 4 - %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1 + %second2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 1 %3 = load i32, i32* %second2.i, align 4 %cmp2.i = icmp eq i32 %2, %3 br i1 %cmp2.i, label %land.rhs.i.2, label %opeq1.exit land.rhs.i.2: - %third.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 2 + %third.i = getelementptr inbounds %S, %S* %a, i64 0, i32 2 %4 = load i32, i32* %third.i, align 4 - %third2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 2 + %third2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 2 %5 = load i32, i32* %third2.i, align 4 %cmp3.i = icmp eq i32 %4, %5 br i1 %cmp3.i, label %land.rhs.i.3, label %opeq1.exit land.rhs.i.3: - %fourth.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 3 + %fourth.i = getelementptr inbounds %S, %S* %a, i64 0, i32 3 %6 = load i32, i32* %fourth.i, align 4 - %fourth2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 3 + %fourth2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 3 %7 = load i32, i32* %fourth2.i, align 4 %cmp4.i = icmp eq i32 %6, %7 br label %opeq1.exit diff --git a/llvm/test/Transforms/MergeICmps/X86/atomic.ll b/llvm/test/Transforms/MergeICmps/X86/atomic.ll --- a/llvm/test/Transforms/MergeICmps/X86/atomic.ll +++ b/llvm/test/Transforms/MergeICmps/X86/atomic.ll @@ -1,21 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s -%"struct.std::pair" = type { i32, i32 } +%S = type { i32, i32 } define zeroext i1 @opeq( ; CHECK-LABEL: @opeq( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 0 +; CHECK-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4 -; CHECK-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0 +; CHECK-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4 ; CHECK-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]] ; CHECK-NEXT: br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]] ; CHECK: land.rhs.i: -; CHECK-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A]], i64 0, i32 1 +; CHECK-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = load atomic i32, i32* [[SECOND_I]] seq_cst, align 4 -; CHECK-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B]], i64 0, i32 1 +; CHECK-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[SECOND2_I]], align 4 ; CHECK-NEXT: [[CMP3_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]] ; CHECK-NEXT: br label [[OPEQ1_EXIT]] @@ -23,20 +23,20 @@ ; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP3_I]], [[LAND_RHS_I]] ] ; CHECK-NEXT: ret i1 [[TMP4]] ; - %"struct.std::pair"* nocapture readonly dereferenceable(8) %a, - %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 { + %S* nocapture readonly dereferenceable(8) %a, + %S* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 { entry: - %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0 + %first.i = getelementptr inbounds %S, %S* %a, i64 0, i32 0 %0 = load i32, i32* %first.i, align 4 - %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0 + %first1.i = getelementptr inbounds %S, %S* %b, i64 0, i32 0 %1 = load i32, i32* %first1.i, align 4 %cmp.i = icmp eq i32 %0, %1 br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit land.rhs.i: - %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1 + %second.i = getelementptr inbounds %S, %S* %a, i64 0, i32 1 %2 = load atomic i32, i32* %second.i seq_cst, align 4 - %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1 + %second2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 1 %3 = load i32, i32* %second2.i, align 4 %cmp3.i = icmp eq i32 %2, %3 br label %opeq1.exit diff --git a/llvm/test/Transforms/MergeICmps/X86/entry-block-shuffled.ll b/llvm/test/Transforms/MergeICmps/X86/entry-block-shuffled.ll --- a/llvm/test/Transforms/MergeICmps/X86/entry-block-shuffled.ll +++ b/llvm/test/Transforms/MergeICmps/X86/entry-block-shuffled.ll @@ -1,51 +1,69 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s -%"struct.std::pair" = type { i32, i32, i32, i32 } +%S = type { i32, i32, i32, i32 } -; The entry block is part of the chain. It however can not be merged. We need to make the -; first comparison block in the chain the new entry block of the function. +; The entry block is part of the chain. It however can not be merged. We need to +; make sure that the control flow is still consistent (goes through each of the +; blocks). define zeroext i1 @opeq1( ; CHECK-LABEL: @opeq1( -; CHECK-NEXT: br label [[LAND_RHS_I:%.*]] -; CHECK: land.rhs.i: -; CHECK-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0 -; CHECK-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0 -; CHECK-NEXT: [[CSTR:%.*]] = bitcast i32* [[SECOND_I]] to i8* -; CHECK-NEXT: [[CSTR1:%.*]] = bitcast i32* [[SECOND2_I]] to i8* +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0 +; CHECK-NEXT: [[CSTR:%.*]] = bitcast i32* [[TMP1]] to i8* +; CHECK-NEXT: [[CSTR1:%.*]] = bitcast i32* [[TMP2]] to i8* ; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 8) +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[MEMCMP]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[OPEQ1_EXIT:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 3 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]] +; CHECK-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[OPEQ1_EXIT]] +; CHECK: 10: +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 3 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 3 +; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP13]], [[TMP14]] +; CHECK-NEXT: br label [[OPEQ1_EXIT]] +; CHECK: opeq1.exit: +; CHECK-NEXT: [[TMP16:%.*]] = phi i1 [ [[TMP15]], [[TMP10]] ], [ false, [[TMP4]] ], [ false, [[TMP0:%.*]] ] +; CHECK-NEXT: ret i1 [[TMP16]] ; - %"struct.std::pair"* nocapture readonly dereferenceable(16) %a, - %"struct.std::pair"* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 { + %S* nocapture readonly dereferenceable(16) %a, + %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 { entry: - %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 3 + %first.i = getelementptr inbounds %S, %S* %a, i64 0, i32 3 %0 = load i32, i32* %first.i, align 4 - %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 2 + %first1.i = getelementptr inbounds %S, %S* %b, i64 0, i32 2 %1 = load i32, i32* %first1.i, align 4 %cmp.i = icmp eq i32 %0, %1 br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit land.rhs.i: - %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0 + %second.i = getelementptr inbounds %S, %S* %a, i64 0, i32 0 %2 = load i32, i32* %second.i, align 4 - %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0 + %second2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 0 %3 = load i32, i32* %second2.i, align 4 %cmp3.i = icmp eq i32 %2, %3 br i1 %cmp3.i, label %land.rhs.i.2, label %opeq1.exit land.rhs.i.2: - %third.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1 + %third.i = getelementptr inbounds %S, %S* %a, i64 0, i32 1 %4 = load i32, i32* %third.i, align 4 - %third2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1 + %third2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 1 %5 = load i32, i32* %third2.i, align 4 %cmp4.i = icmp eq i32 %4, %5 br i1 %cmp4.i, label %land.rhs.i.3, label %opeq1.exit land.rhs.i.3: - %fourth.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 3 + %fourth.i = getelementptr inbounds %S, %S* %a, i64 0, i32 3 %6 = load i32, i32* %fourth.i, align 4 - %fourth2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 3 + %fourth2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 3 %7 = load i32, i32* %fourth2.i, align 4 %cmp5.i = icmp eq i32 %6, %7 br label %opeq1.exit diff --git a/llvm/test/Transforms/MergeICmps/X86/gep-used-outside.ll b/llvm/test/Transforms/MergeICmps/X86/gep-used-outside.ll --- a/llvm/test/Transforms/MergeICmps/X86/gep-used-outside.ll +++ b/llvm/test/Transforms/MergeICmps/X86/gep-used-outside.ll @@ -1,29 +1,48 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s -%"struct.std::pair" = type { i32, i32 } +%S = type { i32, i32 } ; Check that the transformation is avoided when GEP has a use outside of the ; parant block of the load instruction. define zeroext i32 @opeq1( ; CHECK-LABEL: @opeq1( -; CHECK-NOT: [[MEMCMP:%.*]] = call i32 @memcmp +; CHECK-NEXT: entry: +; CHECK-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4 +; CHECK-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4 +; CHECK-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]] +; CHECK: land.rhs.i: +; CHECK-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[SECOND_I]], align 4 +; CHECK-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[SECOND2_I]], align 4 +; CHECK-NEXT: [[CMP3_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: br label [[OPEQ1_EXIT]] +; CHECK: opeq1.exit: +; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP3_I]], [[LAND_RHS_I]] ] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[FIRST_I]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP4]], i32 [[TMP5]], i32 0 +; CHECK-NEXT: ret i32 [[TMP6]] +; - %"struct.std::pair"* nocapture readonly dereferenceable(16) %a, - %"struct.std::pair"* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 { + %S* nocapture readonly dereferenceable(16) %a, + %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 { entry: - %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1 + %first.i = getelementptr inbounds %S, %S* %a, i64 0, i32 1 %0 = load i32, i32* %first.i, align 4 - %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1 + %first1.i = getelementptr inbounds %S, %S* %b, i64 0, i32 1 %1 = load i32, i32* %first1.i, align 4 %cmp.i = icmp eq i32 %0, %1 br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit land.rhs.i: - %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0 + %second.i = getelementptr inbounds %S, %S* %a, i64 0, i32 0 %2 = load i32, i32* %second.i, align 4 - %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0 + %second2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 0 %3 = load i32, i32* %second2.i, align 4 %cmp3.i = icmp eq i32 %2, %3 br label %opeq1.exit diff --git a/llvm/test/Transforms/MergeICmps/X86/int64-and-ptr.ll b/llvm/test/Transforms/MergeICmps/X86/int64-and-ptr.ll --- a/llvm/test/Transforms/MergeICmps/X86/int64-and-ptr.ll +++ b/llvm/test/Transforms/MergeICmps/X86/int64-and-ptr.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -mtriple=x86_64-unknown-unknown -mergeicmps -S | FileCheck %s --check-prefix=X86 ; 8-byte int and 8-byte pointer should merge into a 16-byte memcmp. diff --git a/llvm/test/Transforms/MergeICmps/X86/last-block-produce-no-value.ll b/llvm/test/Transforms/MergeICmps/X86/last-block-produce-no-value.ll --- a/llvm/test/Transforms/MergeICmps/X86/last-block-produce-no-value.ll +++ b/llvm/test/Transforms/MergeICmps/X86/last-block-produce-no-value.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s -%"struct.std::pair" = type { i32, i32, i32 } +%S = type { i32, i32, i32 } ; Last block does not produce the non-constant value into the phi. ; We could handle this case, but an easier way would be to allow other transformations such as @@ -11,39 +11,39 @@ define zeroext i1 @opeq1( ; CHECK-LABEL: @opeq1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 0 +; CHECK-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4 -; CHECK-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0 +; CHECK-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4 ; CHECK-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]] ; CHECK-NEXT: br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]] ; CHECK: land.rhs.i: -; CHECK-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A]], i64 0, i32 1 +; CHECK-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[SECOND_I]], align 4 -; CHECK-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B]], i64 0, i32 1 +; CHECK-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[SECOND2_I]], align 4 ; CHECK-NEXT: [[CMP3_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]] -; CHECK-NEXT: br i1 [[CMP3_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]] +; CHECK-NEXT: br i1 [[CMP3_I]], label [[LAND_RHS_I_2:%.*]], label [[OPEQ1_EXIT]] ; CHECK: land.rhs.i.2: ; CHECK-NEXT: br label [[OPEQ1_EXIT]] ; CHECK: opeq1.exit: -; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP3_I]], [[LAND_RHS_I]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ false, [[LAND_RHS_I]] ], [ [[CMP3_I]], [[LAND_RHS_I_2]] ] ; CHECK-NEXT: ret i1 [[TMP4]] ; - %"struct.std::pair"* nocapture readonly dereferenceable(12) %a, - %"struct.std::pair"* nocapture readonly dereferenceable(12) %b) local_unnamed_addr #0 { + %S* nocapture readonly dereferenceable(12) %a, + %S* nocapture readonly dereferenceable(12) %b) local_unnamed_addr #0 { entry: - %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0 + %first.i = getelementptr inbounds %S, %S* %a, i64 0, i32 0 %0 = load i32, i32* %first.i, align 4 - %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0 + %first1.i = getelementptr inbounds %S, %S* %b, i64 0, i32 0 %1 = load i32, i32* %first1.i, align 4 %cmp.i = icmp eq i32 %0, %1 br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit land.rhs.i: - %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1 + %second.i = getelementptr inbounds %S, %S* %a, i64 0, i32 1 %2 = load i32, i32* %second.i, align 4 - %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1 + %second2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 1 %3 = load i32, i32* %second2.i, align 4 %cmp3.i = icmp eq i32 %2, %3 br i1 %cmp3.i, label %land.rhs.i.2, label %opeq1.exit diff --git a/llvm/test/Transforms/MergeICmps/X86/lit.local.cfg b/llvm/test/Transforms/MergeICmps/X86/lit.local.cfg --- a/llvm/test/Transforms/MergeICmps/X86/lit.local.cfg +++ b/llvm/test/Transforms/MergeICmps/X86/lit.local.cfg @@ -1,3 +1,2 @@ if not 'X86' in config.root.targets: config.unsupported = True - diff --git a/llvm/test/Transforms/MergeICmps/X86/multiple-blocks-does-work.ll b/llvm/test/Transforms/MergeICmps/X86/multiple-blocks-does-work.ll --- a/llvm/test/Transforms/MergeICmps/X86/multiple-blocks-does-work.ll +++ b/llvm/test/Transforms/MergeICmps/X86/multiple-blocks-does-work.ll @@ -1,31 +1,47 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86 -%"struct.std::pair" = type { i32, i32, i32, i32 } +%S = type { i32, i32, i32, i32 } declare void @foo(...) ; We can discard %entry and %land.rhs.i, but still merge the last 2 blocks. define zeroext i1 @opeq1( ; X86-LABEL: @opeq1( -; X86: land.rhs.i.2: -; X86-NEXT: [[THIRD_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 2 -; X86-NEXT: [[THIRD1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 2 -; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[THIRD_I]] to i8* -; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[THIRD1_I]] to i8* +; X86-NEXT: entry: +; X86-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0 +; X86-NEXT: [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4 +; X86-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0 +; X86-NEXT: [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4 +; X86-NEXT: call void (...) @foo() +; X86-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]] +; X86-NEXT: br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]] +; X86: land.rhs.i: +; X86-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 1 +; X86-NEXT: [[TMP2:%.*]] = load i32, i32* [[SECOND_I]], align 4 +; X86-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 1 +; X86-NEXT: [[TMP3:%.*]] = load i32, i32* [[SECOND2_I]], align 4 +; X86-NEXT: call void (...) @foo() +; X86-NEXT: [[CMP2_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]] +; X86-NEXT: br i1 [[CMP2_I]], label [[TMP4:%.*]], label [[OPEQ1_EXIT]] +; X86: 4: +; X86-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 2 +; X86-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 2 +; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[TMP5]] to i8* +; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[TMP6]] to i8* ; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 8) -; X86-NEXT: [[TMP0:%.*]] = icmp eq i32 [[MEMCMP]], 0 -; X86-NEXT: br label [[OPEQ1_EXIT:%.*]] +; X86-NEXT: [[TMP7:%.*]] = icmp eq i32 [[MEMCMP]], 0 +; X86-NEXT: br label [[OPEQ1_EXIT]] ; X86: opeq1.exit: -; X86-NEXT: [[TMP1:%.*]] = phi i1 [ false, %entry ], [ false, %land.rhs.i ], [ [[TMP0]], %land.rhs.i.2 ] -; X86-NEXT: ret i1 [[TMP1]] +; X86-NEXT: [[TMP8:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ false, [[LAND_RHS_I]] ], [ [[TMP7]], [[TMP4]] ] +; X86-NEXT: ret i1 [[TMP8]] ; - %"struct.std::pair"* nocapture readonly dereferenceable(16) %a, - %"struct.std::pair"* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 { + %S* nocapture readonly dereferenceable(16) %a, + %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 { entry: - %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0 + %first.i = getelementptr inbounds %S, %S* %a, i64 0, i32 0 %0 = load i32, i32* %first.i, align 4 - %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0 + %first1.i = getelementptr inbounds %S, %S* %b, i64 0, i32 0 %1 = load i32, i32* %first1.i, align 4 ; Does other work. call void (...) @foo() @@ -33,9 +49,9 @@ br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit land.rhs.i: - %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1 + %second.i = getelementptr inbounds %S, %S* %a, i64 0, i32 1 %2 = load i32, i32* %second.i, align 4 - %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1 + %second2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 1 %3 = load i32, i32* %second2.i, align 4 ; Does other work. call void (...) @foo() @@ -43,17 +59,17 @@ br i1 %cmp2.i, label %land.rhs.i.2, label %opeq1.exit land.rhs.i.2: - %third.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 2 + %third.i = getelementptr inbounds %S, %S* %a, i64 0, i32 2 %4 = load i32, i32* %third.i, align 4 - %third2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 2 + %third2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 2 %5 = load i32, i32* %third2.i, align 4 %cmp3.i = icmp eq i32 %4, %5 br i1 %cmp3.i, label %land.rhs.i.3, label %opeq1.exit land.rhs.i.3: - %fourth.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 3 + %fourth.i = getelementptr inbounds %S, %S* %a, i64 0, i32 3 %6 = load i32, i32* %fourth.i, align 4 - %fourth2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 3 + %fourth2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 3 %7 = load i32, i32* %fourth2.i, align 4 %cmp4.i = icmp eq i32 %6, %7 br label %opeq1.exit diff --git a/llvm/test/Transforms/MergeICmps/X86/pair-int32-int32.ll b/llvm/test/Transforms/MergeICmps/X86/pair-int32-int32.ll --- a/llvm/test/Transforms/MergeICmps/X86/pair-int32-int32.ll +++ b/llvm/test/Transforms/MergeICmps/X86/pair-int32-int32.ll @@ -2,34 +2,32 @@ ; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86 ; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S -disable-simplify-libcalls | FileCheck %s --check-prefix=X86-NOBUILTIN -%"struct.std::pair" = type { i32, i32 } +%S = type { i32, i32 } define zeroext i1 @opeq1( ; X86-LABEL: @opeq1( -; X86-NEXT: entry: -; X86-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 0 -; X86-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0 -; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[FIRST_I]] to i8* -; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[FIRST1_I]] to i8* +; X86-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0 +; X86-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0 +; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[TMP1]] to i8* +; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[TMP2]] to i8* ; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 8) -; X86-NEXT: [[TMP0:%.*]] = icmp eq i32 [[MEMCMP]], 0 +; X86-NEXT: [[TMP3:%.*]] = icmp eq i32 [[MEMCMP]], 0 ; X86-NEXT: br label [[OPEQ1_EXIT:%.*]] ; X86: opeq1.exit: -; X86-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[ENTRY:%.*]] ] -; X86-NEXT: ret i1 [[TMP1]] +; X86-NEXT: ret i1 [[TMP3]] ; ; X86-NOBUILTIN-LABEL: @opeq1( ; X86-NOBUILTIN-NEXT: entry: -; X86-NOBUILTIN-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 0 +; X86-NOBUILTIN-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0 ; X86-NOBUILTIN-NEXT: [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4 -; X86-NOBUILTIN-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0 +; X86-NOBUILTIN-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0 ; X86-NOBUILTIN-NEXT: [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4 ; X86-NOBUILTIN-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]] ; X86-NOBUILTIN-NEXT: br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]] ; X86-NOBUILTIN: land.rhs.i: -; X86-NOBUILTIN-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A]], i64 0, i32 1 +; X86-NOBUILTIN-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 1 ; X86-NOBUILTIN-NEXT: [[TMP2:%.*]] = load i32, i32* [[SECOND_I]], align 4 -; X86-NOBUILTIN-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B]], i64 0, i32 1 +; X86-NOBUILTIN-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 1 ; X86-NOBUILTIN-NEXT: [[TMP3:%.*]] = load i32, i32* [[SECOND2_I]], align 4 ; X86-NOBUILTIN-NEXT: [[CMP3_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]] ; X86-NOBUILTIN-NEXT: br label [[OPEQ1_EXIT]] @@ -37,20 +35,20 @@ ; X86-NOBUILTIN-NEXT: [[TMP4:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP3_I]], [[LAND_RHS_I]] ] ; X86-NOBUILTIN-NEXT: ret i1 [[TMP4]] ; - %"struct.std::pair"* nocapture readonly dereferenceable(8) %a, - %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 { + %S* nocapture readonly dereferenceable(8) %a, + %S* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 { entry: - %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0 + %first.i = getelementptr inbounds %S, %S* %a, i64 0, i32 0 %0 = load i32, i32* %first.i, align 4 - %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0 + %first1.i = getelementptr inbounds %S, %S* %b, i64 0, i32 0 %1 = load i32, i32* %first1.i, align 4 %cmp.i = icmp eq i32 %0, %1 br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit land.rhs.i: - %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1 + %second.i = getelementptr inbounds %S, %S* %a, i64 0, i32 1 %2 = load i32, i32* %second.i, align 4 - %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1 + %second2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 1 %3 = load i32, i32* %second2.i, align 4 %cmp3.i = icmp eq i32 %2, %3 br label %opeq1.exit @@ -67,31 +65,28 @@ ; Same as above, but the two blocks are in inverse order. define zeroext i1 @opeq1_inverse( ; X86-LABEL: @opeq1_inverse( -; X86-NEXT: br label [[LAND_RHS_I:%.*]] -; X86: land.rhs.i: -; X86-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 0 -; X86-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0 -; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[SECOND_I]] to i8* -; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[SECOND2_I]] to i8* +; X86-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0 +; X86-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0 +; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[TMP1]] to i8* +; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[TMP2]] to i8* ; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 8) -; X86-NEXT: [[TMP1:%.*]] = icmp eq i32 [[MEMCMP]], 0 +; X86-NEXT: [[TMP3:%.*]] = icmp eq i32 [[MEMCMP]], 0 ; X86-NEXT: br label [[OPEQ1_EXIT:%.*]] ; X86: opeq1.exit: -; X86-NEXT: [[TMP2:%.*]] = phi i1 [ [[TMP1]], [[LAND_RHS_I]] ] -; X86-NEXT: ret i1 [[TMP2]] +; X86-NEXT: ret i1 [[TMP3]] ; ; X86-NOBUILTIN-LABEL: @opeq1_inverse( ; X86-NOBUILTIN-NEXT: entry: -; X86-NOBUILTIN-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 1 +; X86-NOBUILTIN-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 1 ; X86-NOBUILTIN-NEXT: [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4 -; X86-NOBUILTIN-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 1 +; X86-NOBUILTIN-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 1 ; X86-NOBUILTIN-NEXT: [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4 ; X86-NOBUILTIN-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]] ; X86-NOBUILTIN-NEXT: br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]] ; X86-NOBUILTIN: land.rhs.i: -; X86-NOBUILTIN-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A]], i64 0, i32 0 +; X86-NOBUILTIN-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 0 ; X86-NOBUILTIN-NEXT: [[TMP2:%.*]] = load i32, i32* [[SECOND_I]], align 4 -; X86-NOBUILTIN-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B]], i64 0, i32 0 +; X86-NOBUILTIN-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 0 ; X86-NOBUILTIN-NEXT: [[TMP3:%.*]] = load i32, i32* [[SECOND2_I]], align 4 ; X86-NOBUILTIN-NEXT: [[CMP3_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]] ; X86-NOBUILTIN-NEXT: br label [[OPEQ1_EXIT]] @@ -99,20 +94,20 @@ ; X86-NOBUILTIN-NEXT: [[TMP4:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP3_I]], [[LAND_RHS_I]] ] ; X86-NOBUILTIN-NEXT: ret i1 [[TMP4]] ; - %"struct.std::pair"* nocapture readonly dereferenceable(8) %a, - %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 { + %S* nocapture readonly dereferenceable(8) %a, + %S* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 { entry: - %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1 + %first.i = getelementptr inbounds %S, %S* %a, i64 0, i32 1 %0 = load i32, i32* %first.i, align 4 - %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1 + %first1.i = getelementptr inbounds %S, %S* %b, i64 0, i32 1 %1 = load i32, i32* %first1.i, align 4 %cmp.i = icmp eq i32 %0, %1 br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit land.rhs.i: - %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0 + %second.i = getelementptr inbounds %S, %S* %a, i64 0, i32 0 %2 = load i32, i32* %second.i, align 4 - %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0 + %second2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 0 %3 = load i32, i32* %second2.i, align 4 %cmp3.i = icmp eq i32 %2, %3 br label %opeq1.exit @@ -126,6 +121,3 @@ ; The branch is now a direct branch; the other block has been removed. ; The phi is updated. } - - - diff --git a/llvm/test/Transforms/MergeICmps/X86/split-block-does-work.ll b/llvm/test/Transforms/MergeICmps/X86/split-block-does-work.ll --- a/llvm/test/Transforms/MergeICmps/X86/split-block-does-work.ll +++ b/llvm/test/Transforms/MergeICmps/X86/split-block-does-work.ll @@ -1,31 +1,31 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86 -%"struct.std::pair" = type { i32, i32, i32, i32 } +%S = type { i32, i32, i32, i32 } declare void @foo(...) nounwind readnone ; We can split %entry and create a memcmp(16 bytes). define zeroext i1 @opeq1( ; X86-LABEL: @opeq1( -; -; Make sure this call is moved to the beginning of the entry block. -; X86: entry: ; X86-NEXT: call void (...) @foo() -; X86-NEXT: [[THIRD_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 0 -; X86-NEXT: [[THIRD1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0 -; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[THIRD_I]] to i8* -; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[THIRD1_I]] to i8* +; X86-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0 +; X86-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0 +; X86-NEXT: [[CSTR:%.*]] = bitcast i32* [[TMP1]] to i8* +; X86-NEXT: [[CSTR1:%.*]] = bitcast i32* [[TMP2]] to i8* ; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 16) -; X86-NEXT: [[TMP0:%.*]] = icmp eq i32 [[MEMCMP]], 0 +; X86-NEXT: [[TMP3:%.*]] = icmp eq i32 [[MEMCMP]], 0 ; X86-NEXT: br label [[OPEQ1_EXIT:%.*]] +; X86: opeq1.exit: +; X86-NEXT: ret i1 [[TMP3]] ; - %"struct.std::pair"* nocapture readonly dereferenceable(16) %a, - %"struct.std::pair"* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 { +; Make sure this call is moved to the beginning of the entry block. + %S* nocapture readonly dereferenceable(16) %a, + %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 { entry: - %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0 + %first.i = getelementptr inbounds %S, %S* %a, i64 0, i32 0 %0 = load i32, i32* %first.i, align 4 - %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0 + %first1.i = getelementptr inbounds %S, %S* %b, i64 0, i32 0 %1 = load i32, i32* %first1.i, align 4 ; Does other work. call void (...) @foo() @@ -33,25 +33,25 @@ br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit land.rhs.i: - %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1 + %second.i = getelementptr inbounds %S, %S* %a, i64 0, i32 1 %2 = load i32, i32* %second.i, align 4 - %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1 + %second2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 1 %3 = load i32, i32* %second2.i, align 4 %cmp2.i = icmp eq i32 %2, %3 br i1 %cmp2.i, label %land.rhs.i.2, label %opeq1.exit land.rhs.i.2: - %third.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 2 + %third.i = getelementptr inbounds %S, %S* %a, i64 0, i32 2 %4 = load i32, i32* %third.i, align 4 - %third2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 2 + %third2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 2 %5 = load i32, i32* %third2.i, align 4 %cmp3.i = icmp eq i32 %4, %5 br i1 %cmp3.i, label %land.rhs.i.3, label %opeq1.exit land.rhs.i.3: - %fourth.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 3 + %fourth.i = getelementptr inbounds %S, %S* %a, i64 0, i32 3 %6 = load i32, i32* %fourth.i, align 4 - %fourth2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 3 + %fourth2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 3 %7 = load i32, i32* %fourth2.i, align 4 %cmp4.i = icmp eq i32 %6, %7 br label %opeq1.exit @@ -65,18 +65,46 @@ ; We will not be able to merge anything, make sure the call is not moved out. define zeroext i1 @opeq1_discontiguous( ; X86-LABEL: @opeq1_discontiguous( +; X86-NEXT: entry: +; X86-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 1 +; X86-NEXT: [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4 +; X86-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0 +; X86-NEXT: [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4 +; X86-NEXT: call void (...) @foo() +; X86-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]] +; X86-NEXT: br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]] +; X86: land.rhs.i: +; X86-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 2 +; X86-NEXT: [[TMP2:%.*]] = load i32, i32* [[SECOND_I]], align 4 +; X86-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 1 +; X86-NEXT: [[TMP3:%.*]] = load i32, i32* [[SECOND2_I]], align 4 +; X86-NEXT: [[CMP2_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]] +; X86-NEXT: br i1 [[CMP2_I]], label [[LAND_RHS_I_2:%.*]], label [[OPEQ1_EXIT]] +; X86: land.rhs.i.2: +; X86-NEXT: [[THIRD_I:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 2 +; X86-NEXT: [[TMP4:%.*]] = load i32, i32* [[THIRD_I]], align 4 +; X86-NEXT: [[THIRD2_I:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 3 +; X86-NEXT: [[TMP5:%.*]] = load i32, i32* [[THIRD2_I]], align 4 +; X86-NEXT: [[CMP3_I:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]] +; X86-NEXT: br i1 [[CMP3_I]], label [[LAND_RHS_I_3:%.*]], label [[OPEQ1_EXIT]] +; X86: land.rhs.i.3: +; X86-NEXT: [[FOURTH_I:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 1 +; X86-NEXT: [[TMP6:%.*]] = load i32, i32* [[FOURTH_I]], align 4 +; X86-NEXT: [[FOURTH2_I:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 3 +; X86-NEXT: [[TMP7:%.*]] = load i32, i32* [[FOURTH2_I]], align 4 +; X86-NEXT: [[CMP4_I:%.*]] = icmp eq i32 [[TMP6]], [[TMP7]] +; X86-NEXT: br label [[OPEQ1_EXIT]] +; X86: opeq1.exit: +; X86-NEXT: [[TMP8:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ false, [[LAND_RHS_I]] ], [ false, [[LAND_RHS_I_2]] ], [ [[CMP4_I]], [[LAND_RHS_I_3]] ] +; X86-NEXT: ret i1 [[TMP8]] ; ; Make sure this call is moved in the entry block. -; X86: entry: -; X86: [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 1 -; X86: [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0 -; X86: call void (...) @foo() - %"struct.std::pair"* nocapture readonly dereferenceable(16) %a, - %"struct.std::pair"* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 { + %S* nocapture readonly dereferenceable(16) %a, + %S* nocapture readonly dereferenceable(16) %b) local_unnamed_addr #0 { entry: - %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1 + %first.i = getelementptr inbounds %S, %S* %a, i64 0, i32 1 %0 = load i32, i32* %first.i, align 4 - %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0 + %first1.i = getelementptr inbounds %S, %S* %b, i64 0, i32 0 %1 = load i32, i32* %first1.i, align 4 ; Does other work. call void (...) @foo() @@ -84,25 +112,25 @@ br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit land.rhs.i: - %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 2 + %second.i = getelementptr inbounds %S, %S* %a, i64 0, i32 2 %2 = load i32, i32* %second.i, align 4 - %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1 + %second2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 1 %3 = load i32, i32* %second2.i, align 4 %cmp2.i = icmp eq i32 %2, %3 br i1 %cmp2.i, label %land.rhs.i.2, label %opeq1.exit land.rhs.i.2: - %third.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 2 + %third.i = getelementptr inbounds %S, %S* %a, i64 0, i32 2 %4 = load i32, i32* %third.i, align 4 - %third2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 3 + %third2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 3 %5 = load i32, i32* %third2.i, align 4 %cmp3.i = icmp eq i32 %4, %5 br i1 %cmp3.i, label %land.rhs.i.3, label %opeq1.exit land.rhs.i.3: - %fourth.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1 + %fourth.i = getelementptr inbounds %S, %S* %a, i64 0, i32 1 %6 = load i32, i32* %fourth.i, align 4 - %fourth2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 3 + %fourth2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 3 %7 = load i32, i32* %fourth2.i, align 4 %cmp4.i = icmp eq i32 %6, %7 br label %opeq1.exit diff --git a/llvm/test/Transforms/MergeICmps/X86/two-complex-bb.ll b/llvm/test/Transforms/MergeICmps/X86/two-complex-bb.ll --- a/llvm/test/Transforms/MergeICmps/X86/two-complex-bb.ll +++ b/llvm/test/Transforms/MergeICmps/X86/two-complex-bb.ll @@ -1,23 +1,23 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s --check-prefix=X86 -%"struct.std::pair" = type { i32, i32 } +%S = type { i32, i32 } ; This tests a function with two complex basic blocks. define zeroext i1 @twocomplexblocks( ; X86-LABEL: @twocomplexblocks( ; X86-NEXT: entry: -; X86-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 0 +; X86-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0 ; X86-NEXT: [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4 -; X86-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0 +; X86-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0 ; X86-NEXT: [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4 ; X86-NEXT: [[EXTRAWORK:%.*]] = add i32 [[TMP0]], [[TMP1]] ; X86-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]] ; X86-NEXT: br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]] ; X86: land.rhs.i: -; X86-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A]], i64 0, i32 1 +; X86-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 1 ; X86-NEXT: [[TMP2:%.*]] = load i32, i32* [[SECOND_I]], align 4 -; X86-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B]], i64 0, i32 1 +; X86-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 1 ; X86-NEXT: [[TMP3:%.*]] = load i32, i32* [[SECOND2_I]], align 4 ; X86-NEXT: [[EXTRAWORK2:%.*]] = add i32 [[TMP2]], [[TMP3]] ; X86-NEXT: [[CMP3_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]] @@ -26,13 +26,13 @@ ; X86-NEXT: [[TMP4:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP3_I]], [[LAND_RHS_I]] ] ; X86-NEXT: ret i1 [[TMP4]] ; - %"struct.std::pair"* nocapture readonly dereferenceable(8) %a, - %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 { + %S* nocapture readonly dereferenceable(8) %a, + %S* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 { entry: ; This is a complex BCE Basic Block. - %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0 + %first.i = getelementptr inbounds %S, %S* %a, i64 0, i32 0 %0 = load i32, i32* %first.i, align 4 - %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0 + %first1.i = getelementptr inbounds %S, %S* %b, i64 0, i32 0 %1 = load i32, i32* %first1.i, align 4 %extrawork = add i32 %0, %1 %cmp.i = icmp eq i32 %0, %1 @@ -40,9 +40,9 @@ land.rhs.i: ; This is a complex BCE Basic Block. - %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1 + %second.i = getelementptr inbounds %S, %S* %a, i64 0, i32 1 %2 = load i32, i32* %second.i, align 4 - %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1 + %second2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 1 %3 = load i32, i32* %second2.i, align 4 %extrawork2 = add i32 %2, %3 %cmp3.i = icmp eq i32 %2, %3 @@ -52,7 +52,3 @@ %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ] ret i1 %4 } - - - - diff --git a/llvm/test/Transforms/MergeICmps/X86/volatile.ll b/llvm/test/Transforms/MergeICmps/X86/volatile.ll --- a/llvm/test/Transforms/MergeICmps/X86/volatile.ll +++ b/llvm/test/Transforms/MergeICmps/X86/volatile.ll @@ -1,21 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -mergeicmps -mtriple=x86_64-unknown-unknown -S | FileCheck %s -%"struct.std::pair" = type { i32, i32 } +%S = type { i32, i32 } define zeroext i1 @opeq( ; CHECK-LABEL: @opeq( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A:%.*]], i64 0, i32 0 +; CHECK-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds [[S:%.*]], %S* [[A:%.*]], i64 0, i32 0 ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[FIRST_I]], align 4 -; CHECK-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B:%.*]], i64 0, i32 0 +; CHECK-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds [[S]], %S* [[B:%.*]], i64 0, i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[FIRST1_I]], align 4 ; CHECK-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]] ; CHECK-NEXT: br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]] ; CHECK: land.rhs.i: -; CHECK-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[A]], i64 0, i32 1 +; CHECK-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds [[S]], %S* [[A]], i64 0, i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = load volatile i32, i32* [[SECOND_I]], align 4 -; CHECK-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* [[B]], i64 0, i32 1 +; CHECK-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds [[S]], %S* [[B]], i64 0, i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[SECOND2_I]], align 4 ; CHECK-NEXT: [[CMP3_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]] ; CHECK-NEXT: br label [[OPEQ1_EXIT]] @@ -23,20 +23,20 @@ ; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP3_I]], [[LAND_RHS_I]] ] ; CHECK-NEXT: ret i1 [[TMP4]] ; - %"struct.std::pair"* nocapture readonly dereferenceable(8) %a, - %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 { + %S* nocapture readonly dereferenceable(8) %a, + %S* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 { entry: - %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0 + %first.i = getelementptr inbounds %S, %S* %a, i64 0, i32 0 %0 = load i32, i32* %first.i, align 4 - %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0 + %first1.i = getelementptr inbounds %S, %S* %b, i64 0, i32 0 %1 = load i32, i32* %first1.i, align 4 %cmp.i = icmp eq i32 %0, %1 br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit land.rhs.i: - %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1 + %second.i = getelementptr inbounds %S, %S* %a, i64 0, i32 1 %2 = load volatile i32, i32* %second.i, align 4 - %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1 + %second2.i = getelementptr inbounds %S, %S* %b, i64 0, i32 1 %3 = load i32, i32* %second2.i, align 4 %cmp3.i = icmp eq i32 %2, %3 br label %opeq1.exit @@ -45,4 +45,3 @@ %4 = phi i1 [ false, %entry ], [ %cmp3.i, %land.rhs.i ] ret i1 %4 } -