diff --git a/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/llvm/lib/Transforms/Scalar/MergeICmps.cpp --- a/llvm/lib/Transforms/Scalar/MergeICmps.cpp +++ b/llvm/lib/Transforms/Scalar/MergeICmps.cpp @@ -624,6 +624,18 @@ Value *IsEqual = nullptr; LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons -> " << BB->getName() << "\n"); + + // If there is one block that requires splitting, we do it now, i.e. + // just before we know we will collapse the chain. The instructions + // can be executed before any of the instructions in the chain. + const auto ToSplit = + std::find_if(Comparisons.begin(), Comparisons.end(), + [](const BCECmpBlock &B) { return B.RequireSplit; }); + if (ToSplit != Comparisons.end()) { + LLVM_DEBUG(dbgs() << "Splitting non_BCE work to header\n"); + ToSplit->split(BB, AA); + } + if (Comparisons.size() == 1) { LLVM_DEBUG(dbgs() << "Only one comparison, updating branches\n"); Value *const LhsLoad = @@ -633,17 +645,6 @@ // There are no blocks to merge, just do the comparison. IsEqual = Builder.CreateICmpEQ(LhsLoad, RhsLoad); } else { - // If there is one block that requires splitting, we do it now, i.e. - // just before we know we will collapse the chain. The instructions - // can be executed before any of the instructions in the chain. - const auto ToSplit = - std::find_if(Comparisons.begin(), Comparisons.end(), - [](const BCECmpBlock &B) { return B.RequireSplit; }); - if (ToSplit != Comparisons.end()) { - LLVM_DEBUG(dbgs() << "Splitting non_BCE work to header\n"); - ToSplit->split(BB, AA); - } - const unsigned TotalSizeBits = std::accumulate( Comparisons.begin(), Comparisons.end(), 0u, [](int Size, const BCECmpBlock &C) { return Size + C.SizeBits(); }); diff --git a/llvm/test/Transforms/MergeICmps/X86/gep-references-bb.ll b/llvm/test/Transforms/MergeICmps/X86/gep-references-bb.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/MergeICmps/X86/gep-references-bb.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -mergeicmps -verify-dom-info | FileCheck %s +target triple = "x86_64" + +%Triple = type { i32, i32, i32, i32 } + +@g = external global i32 + +; bb1 references a gep introduced in bb0. The gep must remain available after +; the merge. +define i1 @bug(%Triple* nonnull dereferenceable(16) %lhs, %Triple* nonnull dereferenceable(16) %rhs) { +; CHECK-LABEL: @bug( +; CHECK-NEXT: bb0: +; CHECK-NEXT: store i32 1, i32* @g, align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr [[TRIPLE:%.*]], %Triple* [[RHS:%.*]], i64 0, i32 0 +; CHECK-NEXT: [[L0_ADDR:%.*]] = getelementptr inbounds [[TRIPLE]], %Triple* [[LHS:%.*]], i64 0, i32 0 +; CHECK-NEXT: [[L0:%.*]] = load i32, i32* [[L0_ADDR]], align 4 +; CHECK-NEXT: [[R0_ADDR:%.*]] = getelementptr inbounds [[TRIPLE]], %Triple* [[RHS]], i64 0, i32 0 +; CHECK-NEXT: [[R0:%.*]] = load i32, i32* [[R0_ADDR]], align 4 +; CHECK-NEXT: [[CMP0:%.*]] = icmp eq i32 [[L0]], [[R0]] +; CHECK-NEXT: br i1 [[CMP0]], label %"bb1+bb2", label [[FINAL:%.*]] +; CHECK: "bb1+bb2": +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[TRIPLE]], %Triple* [[LHS]], i64 0, i32 2 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[GEP]], i64 2 +; CHECK-NEXT: [[CSTR:%.*]] = bitcast i32* [[TMP0]] to i8* +; CHECK-NEXT: [[CSTR1:%.*]] = bitcast i32* [[TMP1]] to i8* +; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(i8* [[CSTR]], i8* [[CSTR1]], i64 8) +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[MEMCMP]], 0 +; CHECK-NEXT: br label [[FINAL]] +; CHECK: final: +; CHECK-NEXT: [[RET:%.*]] = phi i1 [ false, [[BB0:%.*]] ], [ [[TMP2]], %"bb1+bb2" ] +; CHECK-NEXT: ret i1 [[RET]] +; +bb0: + store i32 1, i32* @g + %gep = getelementptr %Triple, %Triple* %rhs, i64 0, i32 0 + %l0_addr = getelementptr inbounds %Triple, %Triple* %lhs, i64 0, i32 0 + %l0 = load i32, i32* %l0_addr, align 4 + %r0_addr = getelementptr inbounds %Triple, %Triple* %rhs, i64 0, i32 0 + %r0 = load i32, i32* %r0_addr, align 4 + %cmp0 = icmp eq i32 %l0, %r0 + br i1 %cmp0, label %bb1, label %final + +bb1: ; preds = %bb0 + %l1_addr = getelementptr inbounds %Triple, %Triple* %lhs, i64 0, i32 2 + %l1 = load i32, i32* %l1_addr, align 4 + %r1_addr = getelementptr inbounds i32, i32* %gep, i64 2 + %r1 = load i32, i32* %r1_addr, align 4 + %cmp1 = icmp eq i32 %l1, %r1 + br i1 %cmp1, label %bb2, label %final + +bb2: ; preds = %bb1 + %l2_addr = getelementptr inbounds %Triple, %Triple* %lhs, i64 0, i32 3 + %l2 = load i32, i32* %l2_addr, align 4 + %r2_addr = getelementptr inbounds i32, i32* %gep, i64 3 + %r2 = load i32, i32* %r2_addr, align 4 + %cmp2 = icmp eq i32 %l2, %r2 + br label %final + +final: ; preds = %bb2, %bb1, %bb0 + %ret = phi i1 [ false, %bb0 ], [ false, %bb1 ], [ %cmp2, %bb2 ] + ret i1 %ret +} +