Index: llvm/lib/Transforms/Scalar/MergeICmps.cpp =================================================================== --- llvm/lib/Transforms/Scalar/MergeICmps.cpp +++ llvm/lib/Transforms/Scalar/MergeICmps.cpp @@ -440,13 +440,6 @@ mergeBlocks(std::vector &&Blocks) { std::vector MergedBlocks; - // Sort to detect continuous offsets. - llvm::sort(Blocks, - [](const BCECmpBlock &LhsBlock, const BCECmpBlock &RhsBlock) { - return std::tie(LhsBlock.Lhs(), LhsBlock.Rhs()) < - std::tie(RhsBlock.Lhs(), RhsBlock.Rhs()); - }); - BCECmpChain::ContiguousBlocks *LastMergedBlock = nullptr; for (BCECmpBlock &Block : Blocks) { if (!LastMergedBlock || !areContiguous(LastMergedBlock->back(), Block)) { @@ -498,7 +491,7 @@ // As once split, there will still be instructions before the BCE cmp // instructions that do other work in program order, i.e. within the // chain before sorting. Unless we can abort the chain at this point - // and start anew. + // and start a new. // // NOTE: we only handle blocks a with single predecessor for now. if (Comparison->canSplit(AA)) { Index: llvm/test/Transforms/MergeICmps/X86/entry-block-shuffled-2.ll =================================================================== --- llvm/test/Transforms/MergeICmps/X86/entry-block-shuffled-2.ll +++ llvm/test/Transforms/MergeICmps/X86/entry-block-shuffled-2.ll @@ -11,23 +11,31 @@ define i1 @test() { ; CHECK-LABEL: @test( -; CHECK-NEXT: "land.lhs.true+entry": +; CHECK-NEXT: entry: ; CHECK-NEXT: [[H:%.*]] = alloca %"struct.a::c", align 8 ; CHECK-NEXT: [[I:%.*]] = alloca %"struct.a::c", align 8 ; CHECK-NEXT: call void @init(ptr [[H]]) ; CHECK-NEXT: call void @init(ptr [[I]]) -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds %"struct.a::c", ptr [[H]], i64 0, i32 1 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds %"struct.a::c", ptr [[I]], i64 0, i32 1 -; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(ptr [[TMP0]], ptr [[TMP1]], i64 16) -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[MEMCMP]], 0 -; CHECK-NEXT: br i1 [[TMP2]], label [[LAND_RHS1:%.*]], label [[LAND_END:%.*]] -; CHECK: land.rhs1: -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[H]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[E:%.*]] = getelementptr inbounds %"struct.a::c", ptr [[H]], i64 0, i32 2 +; CHECK-NEXT: [[V3:%.*]] = load ptr, ptr [[E]], align 8 +; CHECK-NEXT: [[E2:%.*]] = getelementptr inbounds %"struct.a::c", ptr [[I]], i64 0, i32 2 +; CHECK-NEXT: [[V4:%.*]] = load ptr, ptr [[E2]], align 8 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[V3]], [[V4]] +; CHECK-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[LAND_END:%.*]] +; CHECK: land.lhs.true: +; CHECK-NEXT: [[D:%.*]] = getelementptr inbounds %"struct.a::c", ptr [[H]], i64 0, i32 1 +; CHECK-NEXT: [[V5:%.*]] = load ptr, ptr [[D]], align 8 +; CHECK-NEXT: [[D3:%.*]] = getelementptr inbounds %"struct.a::c", ptr [[I]], i64 0, i32 1 +; CHECK-NEXT: [[V6:%.*]] = load ptr, ptr [[D3]], align 8 +; CHECK-NEXT: [[CMP4:%.*]] = icmp eq ptr [[V5]], [[V6]] +; CHECK-NEXT: br i1 [[CMP4]], label [[LAND_RHS:%.*]], label [[LAND_END]] +; CHECK: land.rhs: +; CHECK-NEXT: [[V7:%.*]] = load i32, ptr [[H]], align 8 +; CHECK-NEXT: [[V8:%.*]] = load i32, ptr [[I]], align 8 +; CHECK-NEXT: [[CMP6:%.*]] = icmp eq i32 [[V7]], [[V8]] ; CHECK-NEXT: br label [[LAND_END]] ; CHECK: land.end: -; CHECK-NEXT: [[V9:%.*]] = phi i1 [ [[TMP5]], [[LAND_RHS1]] ], [ false, %"land.lhs.true+entry" ] +; CHECK-NEXT: [[V9:%.*]] = phi i1 [ false, [[LAND_LHS_TRUE]] ], [ false, [[ENTRY:%.*]] ], [ [[CMP6]], [[LAND_RHS]] ] ; CHECK-NEXT: ret i1 [[V9]] ; entry: Index: llvm/test/Transforms/MergeICmps/X86/no-gep-other-work.ll =================================================================== --- llvm/test/Transforms/MergeICmps/X86/no-gep-other-work.ll +++ llvm/test/Transforms/MergeICmps/X86/no-gep-other-work.ll @@ -10,13 +10,22 @@ define i1 @test(ptr dereferenceable(2) %arg, ptr dereferenceable(2) %arg1) { ; CHECK-LABEL: @test( -; CHECK-NEXT: "if+entry": +; CHECK-NEXT: entry: ; CHECK-NEXT: call void @other_work() -; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(ptr [[ARG:%.*]], ptr [[ARG1:%.*]], i64 2) -; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[MEMCMP]], 0 -; CHECK-NEXT: br label [[JOIN:%.*]] +; CHECK-NEXT: [[ARG_OFF:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 1 +; CHECK-NEXT: [[ARG1_OFF:%.*]] = getelementptr inbounds i8, ptr [[ARG1:%.*]], i64 1 +; CHECK-NEXT: [[ARG_OFF_VAL:%.*]] = load i8, ptr [[ARG_OFF]], align 1 +; CHECK-NEXT: [[ARG1_OFF_VAL:%.*]] = load i8, ptr [[ARG1_OFF]], align 1 +; CHECK-NEXT: [[CMP_OFF:%.*]] = icmp eq i8 [[ARG_OFF_VAL]], [[ARG1_OFF_VAL]] +; CHECK-NEXT: br i1 [[CMP_OFF]], label [[IF:%.*]], label [[JOIN:%.*]] +; CHECK: if: +; CHECK-NEXT: [[ARG_VAL:%.*]] = load i8, ptr [[ARG]], align 1 +; CHECK-NEXT: [[ARG1_VAL:%.*]] = load i8, ptr [[ARG1]], align 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[ARG_VAL]], [[ARG1_VAL]] +; CHECK-NEXT: br label [[JOIN]] ; CHECK: join: -; CHECK-NEXT: ret i1 [[TMP0]] +; CHECK-NEXT: [[PHI:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP]], [[IF]] ] +; CHECK-NEXT: ret i1 [[PHI]] ; entry: call void @other_work() Index: llvm/test/Transforms/MergeICmps/X86/pair-int32-int32.ll =================================================================== --- llvm/test/Transforms/MergeICmps/X86/pair-int32-int32.ll +++ llvm/test/Transforms/MergeICmps/X86/pair-int32-int32.ll @@ -58,12 +58,21 @@ ; Same as above, but the two blocks are in inverse order. define zeroext i1 @opeq1_inverse( ; X86-LABEL: @opeq1_inverse( -; X86-NEXT: "land.rhs.i+entry": -; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(ptr [[A:%.*]], ptr [[B:%.*]], i64 8) -; X86-NEXT: [[TMP0:%.*]] = icmp eq i32 [[MEMCMP]], 0 -; X86-NEXT: br label [[OPEQ1_EXIT:%.*]] +; X86-NEXT: entry: +; X86-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds [[S:%.*]], ptr [[A:%.*]], i64 0, i32 1 +; X86-NEXT: [[TMP0:%.*]] = load i32, ptr [[FIRST_I]], align 4 +; X86-NEXT: [[FIRST1_I:%.*]] = getelementptr inbounds [[S]], ptr [[B:%.*]], i64 0, i32 1 +; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[FIRST1_I]], align 4 +; X86-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]] +; X86-NEXT: br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]] +; X86: land.rhs.i: +; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 +; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[B]], align 4 +; X86-NEXT: [[CMP3_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]] +; X86-NEXT: br label [[OPEQ1_EXIT]] ; X86: opeq1.exit: -; X86-NEXT: ret i1 [[TMP0]] +; X86-NEXT: [[TMP4:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[CMP3_I]], [[LAND_RHS_I]] ] +; X86-NEXT: ret i1 [[TMP4]] ; ; X86-NOBUILTIN-LABEL: @opeq1_inverse( ; X86-NOBUILTIN-NEXT: entry: Index: llvm/test/Transforms/MergeICmps/X86/pr62459.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/MergeICmps/X86/pr62459.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=mergeicmps -verify-dom-info -S -mtriple=x86_64-unknown-unknown | FileCheck %s + +%struct1.S= type { i32, i32, i32 } + +; Negative test: https://alive2.llvm.org/ce/z/KQjp2K +define i1 @cmp_eq_revert_order(ptr nocapture noundef nonnull readonly dereferenceable(24) %0, +; CHECK-LABEL: @cmp_eq_revert_order( +; CHECK-NEXT: bb1: +; CHECK-NEXT: [[S2:%.*]] = getelementptr inbounds [[STRUCT1_S:%.*]], ptr [[TMP0:%.*]], i64 0, i32 1 +; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[S2]], align 4 +; CHECK-NEXT: [[S3:%.*]] = getelementptr inbounds [[STRUCT1_S]], ptr [[TMP1:%.*]], i64 0, i32 1 +; CHECK-NEXT: [[V3:%.*]] = load i32, ptr [[S3]], align 4 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[V2]], [[V3]] +; CHECK-NEXT: br i1 [[CMP1]], label [[BB2:%.*]], label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[S4:%.*]] = getelementptr inbounds [[STRUCT1_S]], ptr [[TMP0]], i64 0, i32 0 +; CHECK-NEXT: [[V4:%.*]] = load i32, ptr [[S4]], align 4 +; CHECK-NEXT: [[S5:%.*]] = getelementptr inbounds [[STRUCT1_S]], ptr [[TMP1]], i64 0, i32 0 +; CHECK-NEXT: [[V5:%.*]] = load i32, ptr [[S5]], align 4 +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[V4]], [[V5]] +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[CMP3:%.*]] = phi i1 [ false, [[BB1:%.*]] ], [ [[CMP2]], [[BB2]] ] +; CHECK-NEXT: ret i1 [[CMP3]] +; + ptr nocapture noundef nonnull readonly dereferenceable(24) %1) { +bb1: + %s2 = getelementptr inbounds %struct1.S, ptr %0, i64 0, i32 1 + %v2 = load i32, ptr %s2, align 4 + %s3 = getelementptr inbounds %struct1.S, ptr %1, i64 0, i32 1 + %v3 = load i32, ptr %s3, align 4 + %cmp1 = icmp eq i32 %v2, %v3 + br i1 %cmp1, label %bb2, label %bb3 + +bb2: ; preds = %bb1 + %s4 = getelementptr inbounds %struct1.S, ptr %0, i64 0, i32 0 + %v4 = load i32, ptr %s4, align 4 + %s5 = getelementptr inbounds %struct1.S, ptr %1, i64 0, i32 0 + %v5 = load i32, ptr %s5, align 4 + %cmp2 = icmp eq i32 %v4, %v5 + br label %bb3 + +bb3: ; preds = %bb2, %bb1, %bb0 + %cmp3 = phi i1 [ false, %bb1 ], [ %cmp2, %bb2 ] + ret i1 %cmp3 +} Index: llvm/test/Transforms/MergeICmps/X86/split-block-does-work.ll =================================================================== --- llvm/test/Transforms/MergeICmps/X86/split-block-does-work.ll +++ llvm/test/Transforms/MergeICmps/X86/split-block-does-work.ll @@ -60,37 +60,29 @@ ; We will not be able to merge anything, make sure the call is not moved out. define zeroext i1 @opeq1_discontiguous( ; X86-LABEL: @opeq1_discontiguous( -; X86-NEXT: entry: -; X86-NEXT: [[FIRST_I:%.*]] = getelementptr inbounds [[S:%.*]], ptr [[A:%.*]], i64 0, i32 1 -; X86-NEXT: [[TMP0:%.*]] = load i32, ptr [[FIRST_I]], align 4 -; X86-NEXT: [[TMP1:%.*]] = load i32, ptr [[B:%.*]], align 4 +; X86-NEXT: "entry+land.rhs.i": ; X86-NEXT: call void (...) @foo() #[[ATTR2]] -; X86-NEXT: [[CMP_I:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]] -; X86-NEXT: br i1 [[CMP_I]], label [[LAND_RHS_I:%.*]], label [[OPEQ1_EXIT:%.*]] -; X86: land.rhs.i: -; X86-NEXT: [[SECOND_I:%.*]] = getelementptr inbounds [[S]], ptr [[A]], i64 0, i32 2 -; X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[SECOND_I]], align 4 -; X86-NEXT: [[SECOND2_I:%.*]] = getelementptr inbounds [[S]], ptr [[B]], i64 0, i32 1 -; X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[SECOND2_I]], align 4 -; X86-NEXT: [[CMP2_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]] -; X86-NEXT: br i1 [[CMP2_I]], label [[LAND_RHS_I_2:%.*]], label [[OPEQ1_EXIT]] -; X86: land.rhs.i.2: -; X86-NEXT: [[THIRD_I:%.*]] = getelementptr inbounds [[S]], ptr [[A]], i64 0, i32 2 -; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[THIRD_I]], align 4 -; X86-NEXT: [[THIRD2_I:%.*]] = getelementptr inbounds [[S]], ptr [[B]], i64 0, i32 3 -; X86-NEXT: [[TMP5:%.*]] = load i32, ptr [[THIRD2_I]], align 4 -; X86-NEXT: [[CMP3_I:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]] -; X86-NEXT: br i1 [[CMP3_I]], label [[LAND_RHS_I_3:%.*]], label [[OPEQ1_EXIT]] -; X86: land.rhs.i.3: -; X86-NEXT: [[FOURTH_I:%.*]] = getelementptr inbounds [[S]], ptr [[A]], i64 0, i32 1 -; X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[FOURTH_I]], align 4 -; X86-NEXT: [[FOURTH2_I:%.*]] = getelementptr inbounds [[S]], ptr [[B]], i64 0, i32 3 -; X86-NEXT: [[TMP7:%.*]] = load i32, ptr [[FOURTH2_I]], align 4 -; X86-NEXT: [[CMP4_I:%.*]] = icmp eq i32 [[TMP6]], [[TMP7]] +; X86-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[S:%.*]], ptr [[A:%.*]], i64 0, i32 1 +; X86-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(ptr [[TMP0]], ptr [[B:%.*]], i64 8) +; X86-NEXT: [[TMP1:%.*]] = icmp eq i32 [[MEMCMP]], 0 +; X86-NEXT: br i1 [[TMP1]], label [[LAND_RHS_I_22:%.*]], label [[OPEQ1_EXIT:%.*]] +; X86: land.rhs.i.22: +; X86-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[S]], ptr [[A]], i64 0, i32 2 +; X86-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[S]], ptr [[B]], i64 0, i32 3 +; X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 +; X86-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 +; X86-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]] +; X86-NEXT: br i1 [[TMP6]], label [[LAND_RHS_I_31:%.*]], label [[OPEQ1_EXIT]] +; X86: land.rhs.i.31: +; X86-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[S]], ptr [[A]], i64 0, i32 1 +; X86-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[S]], ptr [[B]], i64 0, i32 3 +; X86-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4 +; X86-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4 +; X86-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP9]], [[TMP10]] ; X86-NEXT: br label [[OPEQ1_EXIT]] ; X86: opeq1.exit: -; X86-NEXT: [[TMP8:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ false, [[LAND_RHS_I]] ], [ false, [[LAND_RHS_I_2]] ], [ [[CMP4_I]], [[LAND_RHS_I_3]] ] -; X86-NEXT: ret i1 [[TMP8]] +; X86-NEXT: [[TMP12:%.*]] = phi i1 [ [[TMP11]], [[LAND_RHS_I_31]] ], [ false, [[LAND_RHS_I_22]] ], [ false, %"entry+land.rhs.i" ] +; X86-NEXT: ret i1 [[TMP12]] ; ; Make sure this call is moved in the entry block. ptr nocapture readonly dereferenceable(16) %a,