diff --git a/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/llvm/lib/Transforms/Scalar/MergeICmps.cpp --- a/llvm/lib/Transforms/Scalar/MergeICmps.cpp +++ b/llvm/lib/Transforms/Scalar/MergeICmps.cpp @@ -624,6 +624,18 @@ Value *IsEqual = nullptr; LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons -> " << BB->getName() << "\n"); + + // If there is one block that requires splitting, we do it now, i.e. + // just before we know we will collapse the chain. The instructions + // can be executed before any of the instructions in the chain. + const auto ToSplit = + std::find_if(Comparisons.begin(), Comparisons.end(), + [](const BCECmpBlock &B) { return B.RequireSplit; }); + if (ToSplit != Comparisons.end()) { + LLVM_DEBUG(dbgs() << "Splitting non_BCE work to header\n"); + ToSplit->split(BB, AA); + } + if (Comparisons.size() == 1) { LLVM_DEBUG(dbgs() << "Only one comparison, updating branches\n"); Value *const LhsLoad = @@ -633,17 +645,6 @@ // There are no blocks to merge, just do the comparison. IsEqual = Builder.CreateICmpEQ(LhsLoad, RhsLoad); } else { - // If there is one block that requires splitting, we do it now, i.e. - // just before we know we will collapse the chain. The instructions - // can be executed before any of the instructions in the chain. - const auto ToSplit = - std::find_if(Comparisons.begin(), Comparisons.end(), - [](const BCECmpBlock &B) { return B.RequireSplit; }); - if (ToSplit != Comparisons.end()) { - LLVM_DEBUG(dbgs() << "Splitting non_BCE work to header\n"); - ToSplit->split(BB, AA); - } - const unsigned TotalSizeBits = std::accumulate( Comparisons.begin(), Comparisons.end(), 0u, [](int Size, const BCECmpBlock &C) { return Size + C.SizeBits(); }); diff --git a/llvm/test/Transforms/MergeICmps/X86/gep-references-bb.ll b/llvm/test/Transforms/MergeICmps/X86/gep-references-bb.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/MergeICmps/X86/gep-references-bb.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -mergeicmps -verify-dom-info | FileCheck %s +target triple = "x86_64" + +%Triple = type { %Elem0, %Elem1, %Elem2 } +%Elem0 = type { i32 } +%Elem1 = type { i32 } +%Elem2 = type { i32 } + +;; %gep does not reference an argument. Disable the optimization because +;; otherwise the newly created gep may reference undef (after the basic block +;; defining the pointer operand is deleted). +define i1 @bug(%Triple* nonnull dereferenceable(12) %lhs, %Triple* nonnull dereferenceable(12) %rhs) { +; CHECK-LABEL: @bug( +; CHECK-NEXT: bb02: +; CHECK-NEXT: [[GEP:%.*]] = getelementptr [[TRIPLE:%.*]], %Triple* [[RHS:%.*]], i64 0, i32 0 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[TRIPLE]], %Triple* [[LHS:%.*]], i64 0, i32 0, i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[TRIPLE]], %Triple* [[RHS]], i64 0, i32 0, i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: br i1 [[TMP4]], label %"bb1+bb2", label [[FINAL:%.*]] +; CHECK: "bb1+bb2": +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[TRIPLE]], %Triple* [[LHS]], i64 0, i32 1, i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[ELEM0:%.*]], %Elem0* [[GEP]], i64 1, i32 0 +; CHECK-NEXT: [[CSTR:%.*]] = bitcast i32* [[TMP5]] to i8* +; CHECK-NEXT: [[CSTR1:%.*]] = bitcast i32* [[TMP6]] to i8* +; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 8) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[MEMCMP]], 0 +; CHECK-NEXT: br label [[FINAL]] +; CHECK: final: +; CHECK-NEXT: [[RET:%.*]] = phi i1 [ [[TMP7]], %"bb1+bb2" ], [ false, [[BB02:%.*]] ] +; CHECK-NEXT: ret i1 [[RET]] +; +bb0: + %gep = getelementptr %Triple, %Triple* %rhs, i64 0, i32 0 + %l0_addr = getelementptr inbounds %Triple, %Triple* %lhs, i64 0, i32 0, i32 0 + %l0 = load i32, i32* %l0_addr, align 4 + %r0_addr = getelementptr inbounds %Triple, %Triple* %rhs, i64 0, i32 0, i32 0 + %r0 = load i32, i32* %r0_addr, align 4 + %cmp0 = icmp eq i32 %l0, %r0 + br i1 %cmp0, label %bb1, label %final + +bb1: ; preds = %bb0 + %l1_addr = getelementptr inbounds %Triple, %Triple* %lhs, i64 0, i32 1, i32 0 + %l1 = load i32, i32* %l1_addr, align 4 + %r1_addr = getelementptr inbounds %Elem0, %Elem0* %gep, i64 1, i32 0 + %r1 = load i32, i32* %r1_addr, align 4 + %cmp1 = icmp eq i32 %l1, %r1 + br i1 %cmp1, label %bb2, label %final + +bb2: ; preds = %bb1 + %l2_addr = getelementptr inbounds %Triple, %Triple* %lhs, i64 0, i32 2, i32 0 + %l2 = load i32, i32* %l2_addr, align 4 + %r2_addr = getelementptr inbounds %Elem0, %Elem0* %gep, i64 2, i32 0 + %r2 = load i32, i32* %r2_addr, align 4 + %cmp2 = icmp eq i32 %l2, %r2 + br label %final + +final: ; preds = %bb2, %bb1, %bb0 + %ret = phi i1 [ false, %bb0 ], [ false, %bb1 ], [ %cmp2, %bb2 ] + ret i1 %ret +} +