Index: llvm/include/llvm/Transforms/InstCombine/InstCombine.h =================================================================== --- llvm/include/llvm/Transforms/InstCombine/InstCombine.h +++ llvm/include/llvm/Transforms/InstCombine/InstCombine.h @@ -25,10 +25,12 @@ namespace llvm { -static constexpr unsigned InstCombineDefaultMaxIterations = 1000; +static constexpr unsigned InstCombineDefaultMaxIterations = 1; struct InstCombineOptions { bool UseLoopInfo = false; + // Verify that a fix point has been reached after MaxIterations. + bool VerifyFixpoint = true; unsigned MaxIterations = InstCombineDefaultMaxIterations; InstCombineOptions() = default; @@ -38,6 +40,11 @@ return *this; } + InstCombineOptions &setVerifyFixpoint(bool Value) { + VerifyFixpoint = Value; + return *this; + } + InstCombineOptions &setMaxIterations(unsigned Value) { MaxIterations = Value; return *this; Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -852,6 +852,8 @@ bool Enable = !ParamName.consume_front("no-"); if (ParamName == "use-loop-info") { Result.setUseLoopInfo(Enable); + } else if (ParamName == "verify-fixpoint") { + Result.setVerifyFixpoint(Enable); } else if (Enable && ParamName.consume_front("max-iterations=")) { APInt MaxIterations; if (ParamName.getAsInteger(0, MaxIterations)) Index: llvm/lib/Passes/PassBuilderPipelines.cpp =================================================================== --- llvm/lib/Passes/PassBuilderPipelines.cpp +++ llvm/lib/Passes/PassBuilderPipelines.cpp @@ -366,6 +366,12 @@ Phase == ThinOrFullLTOPhase::FullLTOPreLink; } +static InstCombinePass createInstCombinePass() { + // InstCombine passes in the optimization pipeline should not verify that + // a fixpoint has been reached. + return InstCombinePass(InstCombineOptions().setVerifyFixpoint(false)); +} + // TODO: Investigate the cost/benefit of tail call elimination on debugging. FunctionPassManager PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, @@ -386,7 +392,7 @@ // Hoisting of scalars and load expressions. FPM.addPass( SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); - FPM.addPass(InstCombinePass()); + FPM.addPass(createInstCombinePass()); FPM.addPass(LibCallsShrinkWrapPass()); @@ -463,7 +469,7 @@ /*UseBlockFrequencyInfo=*/true)); FPM.addPass( SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); - FPM.addPass(InstCombinePass()); + FPM.addPass(createInstCombinePass()); // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA. // *All* loop passes must preserve it, in order to be able to use it. FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), @@ -488,7 +494,7 @@ // Run instcombine after redundancy and dead bit elimination to exploit // opportunities opened up by them. - FPM.addPass(InstCombinePass()); + FPM.addPass(createInstCombinePass()); invokePeepholeEPCallbacks(FPM, Level); FPM.addPass(CoroElidePass()); @@ -501,7 +507,7 @@ FPM.addPass(ADCEPass()); FPM.addPass( SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); - FPM.addPass(InstCombinePass()); + FPM.addPass(createInstCombinePass()); invokePeepholeEPCallbacks(FPM, Level); return FPM; @@ -551,7 +557,7 @@ FPM.addPass( SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); - FPM.addPass(InstCombinePass()); + FPM.addPass(createInstCombinePass()); FPM.addPass(AggressiveInstCombinePass()); if (EnableConstraintElimination) @@ -642,7 +648,7 @@ /*UseBlockFrequencyInfo=*/true)); FPM.addPass( SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); - FPM.addPass(InstCombinePass()); + FPM.addPass(createInstCombinePass()); // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass, // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA. // *All* loop passes must preserve it, in order to be able to use it. @@ -676,7 +682,7 @@ // Run instcombine after redundancy and dead bit elimination to exploit // opportunities opened up by them. - FPM.addPass(InstCombinePass()); + FPM.addPass(createInstCombinePass()); invokePeepholeEPCallbacks(FPM, Level); // Re-consider control flow based optimizations after redundancy elimination, @@ -711,7 +717,7 @@ .convertSwitchRangeToICmp(true) .hoistCommonInsts(true) .sinkCommonInsts(true))); - FPM.addPass(InstCombinePass()); + FPM.addPass(createInstCombinePass()); invokePeepholeEPCallbacks(FPM, Level); return FPM; @@ -749,7 +755,7 @@ FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies. FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp( true))); // Merge & remove basic blocks. - FPM.addPass(InstCombinePass()); // Combine silly sequences. + FPM.addPass(createInstCombinePass()); // Combine silly sequences. invokePeepholeEPCallbacks(FPM, Level); CGPipeline.addPass(createCGSCCToFunctionPassAdaptor( @@ -1081,7 +1087,7 @@ FunctionPassManager GlobalCleanupPM; // FIXME: Should this instead by a run of SROA? GlobalCleanupPM.addPass(PromotePass()); - GlobalCleanupPM.addPass(InstCombinePass()); + GlobalCleanupPM.addPass(createInstCombinePass()); invokePeepholeEPCallbacks(GlobalCleanupPM, Level); GlobalCleanupPM.addPass( SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); @@ -1165,7 +1171,7 @@ FPM.addPass(LoopLoadEliminationPass()); } // Cleanup after the loop optimization passes. - FPM.addPass(InstCombinePass()); + FPM.addPass(createInstCombinePass()); if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) { ExtraVectorPassManager ExtraPasses; @@ -1177,7 +1183,7 @@ // dead (or speculatable) control flows or more combining opportunities. ExtraPasses.addPass(EarlyCSEPass()); ExtraPasses.addPass(CorrelatedValuePropagationPass()); - ExtraPasses.addPass(InstCombinePass()); + ExtraPasses.addPass(createInstCombinePass()); LoopPassManager LPM; LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, /*AllowSpeculation=*/true)); @@ -1188,7 +1194,7 @@ /*UseBlockFrequencyInfo=*/true)); ExtraPasses.addPass( SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); - ExtraPasses.addPass(InstCombinePass()); + ExtraPasses.addPass(createInstCombinePass()); FPM.addPass(std::move(ExtraPasses)); } @@ -1211,7 +1217,7 @@ if (IsFullLTO) { FPM.addPass(SCCPPass()); - FPM.addPass(InstCombinePass()); + FPM.addPass(createInstCombinePass()); FPM.addPass(BDCEPass()); } @@ -1226,7 +1232,7 @@ FPM.addPass(VectorCombinePass()); if (!IsFullLTO) { - FPM.addPass(InstCombinePass()); + FPM.addPass(createInstCombinePass()); // Unroll small loops to hide loop backedge latency and saturate any // parallel execution resources of an out-of-order processor. We also then // need to clean up redundancies and loop invariant code. @@ -1251,7 +1257,7 @@ FPM.addPass(SROAPass(SROAOptions::PreserveCFG)); } - FPM.addPass(InstCombinePass()); + FPM.addPass(createInstCombinePass()); // This is needed for two reasons: // 1. It works around problems that instcombine introduces, such as sinking @@ -1729,7 +1735,7 @@ // function pointers. When this happens, we often have to resolve varargs // calls, etc, so let instcombine do this. FunctionPassManager PeepholeFPM; - PeepholeFPM.addPass(InstCombinePass()); + PeepholeFPM.addPass(createInstCombinePass()); if (Level.getSpeedupLevel() > 1) PeepholeFPM.addPass(AggressiveInstCombinePass()); invokePeepholeEPCallbacks(PeepholeFPM, Level); @@ -1775,7 +1781,7 @@ FunctionPassManager FPM; // The IPO Passes may leave cruft around. Clean up after them. - FPM.addPass(InstCombinePass()); + FPM.addPass(createInstCombinePass()); invokePeepholeEPCallbacks(FPM, Level); if (EnableConstraintElimination) Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -146,6 +146,8 @@ "instcombine-max-sink-users", cl::init(32), cl::desc("Maximum number of undroppable users for instruction sinking")); +// FIXME: Remove this option, it has been superseded by verify-fixpoint. +// Only keeping it for now to avoid unnecessary test churn in this patch. static cl::opt InfiniteLoopDetectionThreshold( "instcombine-infinite-loop-threshold", cl::desc("Number of instruction combining iterations considered an " @@ -4249,7 +4251,8 @@ Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI, DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, - ProfileSummaryInfo *PSI, unsigned MaxIterations, LoopInfo *LI) { + ProfileSummaryInfo *PSI, unsigned MaxIterations, bool VerifyFixpoint, + LoopInfo *LI) { auto &DL = F.getParent()->getDataLayout(); /// Builder - This is an IRBuilder that automatically inserts new @@ -4273,35 +4276,36 @@ // Iterate while there is work to do. unsigned Iteration = 0; while (true) { + bool MadeChangeInThisIteration = false; ++NumWorklistIterations; ++Iteration; - if (Iteration > InfiniteLoopDetectionThreshold) { - report_fatal_error( - "Instruction Combining seems stuck in an infinite loop after " + - Twine(InfiniteLoopDetectionThreshold) + " iterations."); - } - - if (Iteration > MaxIterations) { + if (Iteration > MaxIterations && !VerifyFixpoint) { LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << MaxIterations << " on " << F.getName() - << " reached; stopping before reaching a fixpoint\n"); + << " reached; stopping without verifying fixpoint\n"); break; } LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " << F.getName() << "\n"); - MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist, RPOT); + MadeChangeInThisIteration |= + prepareICWorklistFromFunction(F, DL, &TLI, Worklist, RPOT); InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT, ORE, BFI, PSI, DL, LI); IC.MaxArraySizeForCombine = MaxArraySize; - - if (!IC.run()) + MadeChangeInThisIteration |= IC.run(); + if (!MadeChangeInThisIteration) break; MadeIRChange = true; + if (Iteration > MaxIterations) { + report_fatal_error( + "Instruction Combining did not reach a fixpoint after " + + Twine(MaxIterations) + " iterations"); + } } if (Iteration == 1) @@ -4324,7 +4328,8 @@ OS, MapClassName2PassName); OS << '<'; OS << "max-iterations=" << Options.MaxIterations << ";"; - OS << (Options.UseLoopInfo ? "" : "no-") << "use-loop-info"; + OS << (Options.UseLoopInfo ? "" : "no-") << "use-loop-info;"; + OS << (Options.VerifyFixpoint ? "" : "no-") << "verify-fixpoint"; OS << '>'; } @@ -4350,7 +4355,8 @@ &AM.getResult(F) : nullptr; if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE, - BFI, PSI, Options.MaxIterations, LI)) + BFI, PSI, Options.MaxIterations, + Options.VerifyFixpoint, LI)) // No changes, all analyses are preserved. return PreservedAnalyses::all(); @@ -4400,7 +4406,8 @@ return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE, BFI, PSI, - InstCombineDefaultMaxIterations, LI); + InstCombineDefaultMaxIterations, + /*VerifyFixpoint */ false, LI); } char InstructionCombiningPass::ID = 0; Index: llvm/test/Analysis/ValueTracking/numsignbits-from-assume.ll =================================================================== --- llvm/test/Analysis/ValueTracking/numsignbits-from-assume.ll +++ llvm/test/Analysis/ValueTracking/numsignbits-from-assume.ll @@ -1,5 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=instcombine -S | FileCheck %s +; RUN: opt < %s -passes='instcombine' -S | FileCheck %s + +; FIXME: This does not currently reach a fix point, because an assume can only +; be propagated backwards after its argument has been simplified. define i32 @computeNumSignBits_add1(i32 %in) { ; CHECK-LABEL: @computeNumSignBits_add1( @@ -48,7 +51,7 @@ define i32 @computeNumSignBits_sub2(i32 %in) { ; CHECK-LABEL: @computeNumSignBits_sub2( -; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[IN:%.*]], -1 +; CHECK-NEXT: [[SUB:%.*]] = add i32 [[IN:%.*]], -1 ; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[SUB]], 43 ; CHECK-NEXT: call void @llvm.assume(i1 [[COND]]) ; CHECK-NEXT: [[SH:%.*]] = shl nuw nsw i32 [[SUB]], 3 Index: llvm/test/Other/new-pm-print-pipeline.ll =================================================================== --- llvm/test/Other/new-pm-print-pipeline.ll +++ llvm/test/Other/new-pm-print-pipeline.ll @@ -95,8 +95,8 @@ ; CHECK-27: function(separate-const-offset-from-gep) ;; Test InstCombine options - the first pass checks default settings, and the second checks customized options. -; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(instcombine,instcombine)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-28 -; CHECK-28: function(instcombine,instcombine) +; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(instcombine,instcombine)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-28 +; CHECK-28: function(instcombine,instcombine) ;; Test function-attrs ; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='cgscc(function-attrs)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-29 Index: llvm/test/Transforms/InstCombine/constant-fold-iteration.ll =================================================================== --- llvm/test/Transforms/InstCombine/constant-fold-iteration.ll +++ llvm/test/Transforms/InstCombine/constant-fold-iteration.ll @@ -1,7 +1,10 @@ -; RUN: opt < %s -passes=instcombine -S -debug 2>&1 | FileCheck %s +; RUN: opt < %s -passes='instcombine' -S -debug 2>&1 | FileCheck %s ; REQUIRES: asserts target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" +; This test disables fixpoint verification, because that would cause a second +; iteration for verification. + define i32 @a() nounwind readnone { entry: ret i32 zext (i1 icmp eq (i32 0, i32 ptrtoint (ptr @a to i32)) to i32) Index: llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll =================================================================== --- llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll +++ llvm/test/Transforms/InstCombine/merging-multiple-stores-into-successor.ll @@ -1,5 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=instcombine -instcombine-infinite-loop-threshold=3 -S | FileCheck %s +; RUN: opt < %s -passes='instcombine' -S | FileCheck %s + +; FIXME: This currently doesn't reach a fix point, because we don't +; canonicalize the operand order of newly added phi nodes. @var_7 = external global i8, align 1 @var_1 = external global i32, align 4 @@ -28,11 +31,12 @@ ; CHECK-NEXT: br label [[BB12]] ; CHECK: bb12: ; CHECK-NEXT: [[STOREMERGE1:%.*]] = phi i32 [ [[I11]], [[BB10]] ], [ 1, [[BB9]] ] +; CHECK-NEXT: [[STOREMERGE:%.*]] = phi i32 [ 1, [[BB9]] ], [ [[I11]], [[BB10]] ] ; CHECK-NEXT: store i32 [[STOREMERGE1]], ptr @arr_2, align 4 ; CHECK-NEXT: store i16 [[I4]], ptr @arr_4, align 2 ; CHECK-NEXT: [[I8:%.*]] = sext i16 [[I4]] to i32 ; CHECK-NEXT: store i32 [[I8]], ptr @arr_3, align 16 -; CHECK-NEXT: store i32 [[STOREMERGE1]], ptr getelementptr inbounds ([0 x i32], ptr @arr_2, i64 0, i64 1), align 4 +; CHECK-NEXT: store i32 [[STOREMERGE]], ptr getelementptr inbounds ([0 x i32], ptr @arr_2, i64 0, i64 1), align 4 ; CHECK-NEXT: store i16 [[I4]], ptr getelementptr inbounds ([0 x i16], ptr @arr_4, i64 0, i64 1), align 2 ; CHECK-NEXT: store i32 [[I8]], ptr getelementptr inbounds ([8 x i32], ptr @arr_3, i64 0, i64 1), align 4 ; CHECK-NEXT: ret void @@ -275,17 +279,16 @@ } define ptr @inttoptr_merge(i1 %cond, i64 %a, ptr %b) { -; CHECK-LABEL: define ptr @inttoptr_merge -; CHECK-SAME: (i1 [[COND:%.*]], i64 [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-LABEL: @inttoptr_merge( ; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[COND]], label [[BB0:%.*]], label [[BB1:%.*]] +; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB0:%.*]], label [[BB1:%.*]] ; CHECK: BB0: -; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i64 [[A]] to ptr +; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i64 [[A:%.*]] to ptr ; CHECK-NEXT: br label [[SINK:%.*]] ; CHECK: BB1: ; CHECK-NEXT: br label [[SINK]] ; CHECK: sink: -; CHECK-NEXT: [[STOREMERGE:%.*]] = phi ptr [ [[B]], [[BB1]] ], [ [[TMP0]], [[BB0]] ] +; CHECK-NEXT: [[STOREMERGE:%.*]] = phi ptr [ [[B:%.*]], [[BB1]] ], [ [[TMP0]], [[BB0]] ] ; CHECK-NEXT: ret ptr [[STOREMERGE]] ; entry: Index: llvm/test/Transforms/InstCombine/pr55228.ll =================================================================== --- llvm/test/Transforms/InstCombine/pr55228.ll +++ llvm/test/Transforms/InstCombine/pr55228.ll @@ -1,5 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=instcombine < %s | FileCheck %s +; RUN: opt -S -passes='instcombine' < %s | FileCheck %s + +; This does not reach a fixpoint, because the global initializer is not in +; folded form. This will not happen if preceded by a GlobalOpt run. target datalayout = "p:8:8" @@ -8,7 +11,7 @@ define i1 @test(ptr %p) { ; CHECK-LABEL: @test( -; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[P:%.*]], getelementptr inbounds (i8, ptr @g, i8 1) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[P:%.*]], getelementptr inbounds (i8, ptr @g, i64 1) ; CHECK-NEXT: ret i1 [[CMP]] ; %alloca = alloca ptr Index: llvm/test/Transforms/InstCombine/shift.ll =================================================================== --- llvm/test/Transforms/InstCombine/shift.ll +++ llvm/test/Transforms/InstCombine/shift.ll @@ -1,5 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=instcombine -S | FileCheck %s +; RUN: opt < %s -passes='instcombine' -S | FileCheck %s + +; The fuzzer-generated @ashr_out_of_range test case does not reach a fixpoint, +; because a logical and it not relaxed to a bitwise and in one iteration. declare void @use(i64) declare void @use_i32(i32) @@ -1719,7 +1722,7 @@ ; CHECK-NEXT: [[L7:%.*]] = load i177, ptr [[G11]], align 4 ; CHECK-NEXT: [[L7_FROZEN:%.*]] = freeze i177 [[L7]] ; CHECK-NEXT: [[C171:%.*]] = icmp slt i177 [[L7_FROZEN]], 0 -; CHECK-NEXT: [[C17:%.*]] = and i1 [[TMP1]], [[C171]] +; CHECK-NEXT: [[C17:%.*]] = select i1 [[TMP1]], i1 [[C171]], i1 false ; CHECK-NEXT: [[TMP3:%.*]] = sext i1 [[C17]] to i64 ; CHECK-NEXT: [[G62:%.*]] = getelementptr i177, ptr [[G11]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i177 [[L7_FROZEN]], -1 Index: llvm/test/Transforms/PGOProfile/chr.ll =================================================================== --- llvm/test/Transforms/PGOProfile/chr.ll +++ llvm/test/Transforms/PGOProfile/chr.ll @@ -1,5 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes='require,function(chr,instcombine,simplifycfg)' -S | FileCheck %s +; RUN: opt < %s -passes='require,function(chr,instcombine,simplifycfg)' -S | FileCheck %s + +; FIXME: This does not currently reach a fix point, because we don't make use +; of a freeze that is pushed up the instruction chain later. declare void @foo() declare void @bar() @@ -1932,13 +1935,13 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[J_FR:%.*]] = freeze i64 [[J:%.*]] ; CHECK-NEXT: [[I_FR:%.*]] = freeze i64 [[I:%.*]] -; CHECK-NEXT: [[K_FR:%.*]] = freeze i64 [[K:%.*]] -; CHECK-NEXT: [[CMP0:%.*]] = icmp ne i64 [[J_FR]], [[K_FR]] +; CHECK-NEXT: [[CMP0:%.*]] = icmp ne i64 [[J_FR]], [[K:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = freeze i1 [[CMP0]] ; CHECK-NEXT: [[CMP3:%.*]] = icmp ne i64 [[I_FR]], [[J_FR]] ; CHECK-NEXT: [[CMP_I:%.*]] = icmp ne i64 [[I_FR]], 86 -; CHECK-NEXT: [[TMP0:%.*]] = and i1 [[CMP0]], [[CMP3]] -; CHECK-NEXT: [[TMP1:%.*]] = and i1 [[TMP0]], [[CMP_I]] -; CHECK-NEXT: br i1 [[TMP1]], label [[BB1:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof [[PROF15]] +; CHECK-NEXT: [[TMP1:%.*]] = and i1 [[TMP0]], [[CMP3]] +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[CMP_I]] +; CHECK-NEXT: br i1 [[TMP2]], label [[BB1:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof [[PROF15]] ; CHECK: bb1: ; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i64 [[I_FR]], 2 ; CHECK-NEXT: switch i64 [[I_FR]], label [[BB2:%.*]] [ @@ -1962,7 +1965,7 @@ ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[BB10:%.*]] ; CHECK: entry.split.nonchr: -; CHECK-NEXT: br i1 [[CMP0]], label [[BB1_NONCHR:%.*]], label [[BB10]], !prof [[PROF18]] +; CHECK-NEXT: br i1 [[TMP0]], label [[BB1_NONCHR:%.*]], label [[BB10]], !prof [[PROF18]] ; CHECK: bb1.nonchr: ; CHECK-NEXT: [[CMP2_NONCHR:%.*]] = icmp eq i64 [[I_FR]], 2 ; CHECK-NEXT: br i1 [[CMP2_NONCHR]], label [[BB3_NONCHR:%.*]], label [[BB2_NONCHR:%.*]], !prof [[PROF16]] Index: llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll +++ llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll @@ -115,8 +115,8 @@ ; CHECK-NEXT: br label [[FOR_BODY4_US_1:%.*]] ; CHECK: for.body4.us.1: ; CHECK-NEXT: [[K_011_US_1:%.*]] = phi i32 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] ], [ [[INC_US_1:%.*]], [[FOR_BODY4_US_1]] ] -; CHECK-NEXT: [[NARROW:%.*]] = add nuw nsw i32 [[K_011_US_1]], 15 -; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[NARROW]] to i64 +; CHECK-NEXT: [[CONV_US_1:%.*]] = zext i32 [[K_011_US_1]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[CONV_US_1]], 15 ; CHECK-NEXT: [[TMP9:%.*]] = icmp ult i32 [[K_011_US_1]], 210 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP9]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 [[TMP8]] @@ -138,8 +138,8 @@ ; CHECK-NEXT: br label [[FOR_BODY4_US_2:%.*]] ; CHECK: for.body4.us.2: ; CHECK-NEXT: [[K_011_US_2:%.*]] = phi i32 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1]] ], [ [[INC_US_2:%.*]], [[FOR_BODY4_US_2]] ] -; CHECK-NEXT: [[NARROW14:%.*]] = add nuw nsw i32 [[K_011_US_2]], 30 -; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[NARROW14]] to i64 +; CHECK-NEXT: [[CONV_US_2:%.*]] = zext i32 [[K_011_US_2]] to i64 +; CHECK-NEXT: [[TMP15:%.*]] = add nuw nsw i64 [[CONV_US_2]], 30 ; CHECK-NEXT: [[TMP16:%.*]] = icmp ult i32 [[K_011_US_2]], 195 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP16]]) ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 [[TMP15]] @@ -161,8 +161,8 @@ ; CHECK-NEXT: br label [[FOR_BODY4_US_3:%.*]] ; CHECK: for.body4.us.3: ; CHECK-NEXT: [[K_011_US_3:%.*]] = phi i32 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2]] ], [ [[INC_US_3:%.*]], [[FOR_BODY4_US_3]] ] -; CHECK-NEXT: [[NARROW15:%.*]] = add nuw nsw i32 [[K_011_US_3]], 45 -; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[NARROW15]] to i64 +; CHECK-NEXT: [[CONV_US_3:%.*]] = zext i32 [[K_011_US_3]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = add nuw nsw i64 [[CONV_US_3]], 45 ; CHECK-NEXT: [[TMP23:%.*]] = icmp ult i32 [[K_011_US_3]], 180 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP23]]) ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 [[TMP22]]