diff --git a/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/llvm/lib/Transforms/Scalar/MergeICmps.cpp --- a/llvm/lib/Transforms/Scalar/MergeICmps.cpp +++ b/llvm/lib/Transforms/Scalar/MergeICmps.cpp @@ -49,6 +49,7 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/IRBuilder.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -639,10 +640,11 @@ if (Comparisons.size() == 1) { LLVM_DEBUG(dbgs() << "Only one comparison, updating branches\n"); - Value *const LhsLoad = - Builder.CreateLoad(FirstCmp.Lhs().LoadI->getType(), Lhs); - Value *const RhsLoad = - Builder.CreateLoad(FirstCmp.Rhs().LoadI->getType(), Rhs); + // Use clone to keep the metadata + Instruction *const LhsLoad = Builder.Insert(FirstCmp.Lhs().LoadI->clone()); + Instruction *const RhsLoad = Builder.Insert(FirstCmp.Rhs().LoadI->clone()); + LhsLoad->replaceUsesOfWith(LhsLoad->getOperand(0), Lhs); + RhsLoad->replaceUsesOfWith(RhsLoad->getOperand(0), Rhs); // There are no blocks to merge, just do the comparison. IsEqual = Builder.CreateICmpEQ(LhsLoad, RhsLoad); } else { diff --git a/llvm/test/Transforms/MergeICmps/X86/alias-merge-blocks.ll b/llvm/test/Transforms/MergeICmps/X86/alias-merge-blocks.ll --- a/llvm/test/Transforms/MergeICmps/X86/alias-merge-blocks.ll +++ b/llvm/test/Transforms/MergeICmps/X86/alias-merge-blocks.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -mtriple=x86_64-unknown-unknown -passes=mergeicmps -verify-dom-info -S | FileCheck %s --check-prefix=X86 -%S = type { i32, i32, i32, i32 } +%S = type { i32, i32, i32, i32, i32} define zeroext i1 @opeq1( ; X86-LABEL: @opeq1( @@ -54,3 +54,62 @@ %8 = phi i1 [ false, %entry ], [ false, %land.rhs.i] , [ false, %land.rhs.i.2 ], [ %cmp4.i, %land.rhs.i.3 ] ret i1 %8 } + +define zeroext i1 @part_sequent_eq_with_metadata() { +; X86-LABEL: @part_sequent_eq_with_metadata( +; X86-NEXT: bb01: +; X86-NEXT: [[A:%.*]] = alloca [[S:%.*]], align 8 +; X86-NEXT: [[B:%.*]] = alloca [[S]], align 8 +; X86-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4, !range [[RNG0:![0-9]+]], !noundef !1 +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[B]], align 4, !range [[RNG0]], !noundef !1 +; X86-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]] +; X86-NEXT: br i1 [[TMP2]], label %"bb1+bb2+bb3", label [[EXIT:%.*]] +; X86: "bb1+bb2+bb3": +; X86-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[S]], ptr [[A]], i64 0, i32 2 +; X86-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[S]], ptr [[B]], i64 0, i32 2 +; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(ptr [[TMP3]], ptr [[TMP4]], i64 12) +; X86-NEXT: [[TMP5:%.*]] = icmp eq i32 [[MEMCMP]], 0 +; X86-NEXT: br label [[EXIT]] +; X86: exit: +; X86-NEXT: [[RET:%.*]] = phi i1 [ [[TMP5]], %"bb1+bb2+bb3" ], [ false, [[BB01:%.*]] ] +; X86-NEXT: ret i1 [[RET]] +; +bb0: + %a = alloca %S, align 8 + %b = alloca %S, align 8 + %value0 = load i32, ptr %a, align 4, !range !0, !noundef !1 + %value1 = load i32, ptr %b, align 4, !range !0, !noundef !1 + %cmp.i = icmp eq i32 %value0, %value1 + br i1 %cmp.i, label %bb1, label %exit + +bb1: + %second.i = getelementptr inbounds %S, ptr %a, i64 0, i32 2 + %value2 = load i32, ptr %second.i, align 4 + %second2.i = getelementptr inbounds %S, ptr %b, i64 0, i32 2 + %value3 = load i32, ptr %second2.i, align 4 + %cmp2.i = icmp eq i32 %value2, %value3 + br i1 %cmp2.i, label %bb2, label %exit + +bb2: + %third.i = getelementptr inbounds %S, ptr %a, i64 0, i32 3 + %value4 = load i32, ptr %third.i, align 4 + %third2.i = getelementptr inbounds %S, ptr %b, i64 0, i32 3 + %value5 = load i32, ptr %third2.i, align 4 + %cmp3.i = icmp eq i32 %value4, %value5 + br i1 %cmp3.i, label %bb3, label %exit + +bb3: + %fourth.i = getelementptr inbounds %S, ptr %a, i64 0, i32 4 + %value6 = load i32, ptr %fourth.i, align 4 + %fourth2.i = getelementptr inbounds %S, ptr %b, i64 0, i32 4 + %value7 = load i32, ptr %fourth2.i, align 4 + %cmp4.i = icmp eq i32 %value6, %value7 + br label %exit + +exit: + %ret = phi i1 [ false, %bb0 ], [ false, %bb1] , [ false, %bb2] ,[ %cmp4.i, %bb3 ] + ret i1 %ret +} + +!0 = !{i32 0, i32 2} +!1 = !{} diff --git a/llvm/test/Transforms/MergeICmps/X86/entry-block-shuffled-2.ll b/llvm/test/Transforms/MergeICmps/X86/entry-block-shuffled-2.ll --- a/llvm/test/Transforms/MergeICmps/X86/entry-block-shuffled-2.ll +++ b/llvm/test/Transforms/MergeICmps/X86/entry-block-shuffled-2.ll @@ -22,12 +22,12 @@ ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[MEMCMP]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[LAND_RHS1:%.*]], label [[LAND_END:%.*]] ; CHECK: land.rhs1: -; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[H]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[H]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP3]], [[TMP4]] ; CHECK-NEXT: br label [[LAND_END]] ; CHECK: land.end: -; CHECK-NEXT: [[V9:%.*]] = phi i1 [ [[TMP7]], [[LAND_RHS1]] ], [ false, %"land.lhs.true+entry" ] +; CHECK-NEXT: [[V9:%.*]] = phi i1 [ [[TMP5]], [[LAND_RHS1]] ], [ false, %"land.lhs.true+entry" ] ; CHECK-NEXT: ret i1 [[V9]] ; entry: