Index: llvm/lib/Passes/PassBuilderPipelines.cpp =================================================================== --- llvm/lib/Passes/PassBuilderPipelines.cpp +++ llvm/lib/Passes/PassBuilderPipelines.cpp @@ -1117,7 +1117,7 @@ FPM.addPass(LoopLoadEliminationPass()); } // Cleanup after the loop optimization passes. - FPM.addPass(InstCombinePass()); + FPM.addPass(InstCombinePass(InstCombineOptions().setUseLoopInfo(true))); if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) { ExtraVectorPassManager ExtraPasses; @@ -1129,7 +1129,8 @@ // dead (or speculatable) control flows or more combining opportunities. ExtraPasses.addPass(EarlyCSEPass()); ExtraPasses.addPass(CorrelatedValuePropagationPass()); - ExtraPasses.addPass(InstCombinePass()); + ExtraPasses.addPass( + InstCombinePass(InstCombineOptions().setUseLoopInfo(true))); LoopPassManager LPM; LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, /*AllowSpeculation=*/true)); @@ -1180,7 +1181,7 @@ FPM.addPass(VectorCombinePass()); if (!IsFullLTO) { - FPM.addPass(InstCombinePass()); + FPM.addPass(InstCombinePass(InstCombineOptions().setUseLoopInfo(true))); // Unroll small loops to hide loop backedge latency and saturate any // parallel execution resources of an out-of-order processor. We also then // need to clean up redundancies and loop invariant code. @@ -1203,7 +1204,7 @@ // or SimplifyCFG passes scheduled after us, that would cleanup // the CFG mess this may created if allowed to modify CFG, so forbid that. FPM.addPass(SROAPass(SROAOptions::PreserveCFG)); - FPM.addPass(InstCombinePass()); + FPM.addPass(InstCombinePass(InstCombineOptions().setUseLoopInfo(true))); FPM.addPass( RequireAnalysisPass()); FPM.addPass(createFunctionToLoopPassAdaptor( @@ -1217,7 +1218,7 @@ FPM.addPass(AlignmentFromAssumptionsPass()); if (IsFullLTO) - FPM.addPass(InstCombinePass()); + FPM.addPass(InstCombinePass(InstCombineOptions().setUseLoopInfo(true))); } ModulePassManager Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -4651,10 +4651,9 @@ auto &ORE = AM.getResult(F); auto &TTI = AM.getResult(F); - // TODO: Only use LoopInfo when the option is set. This requires that the - // callers in the pass pipeline explicitly set the option. - auto *LI = AM.getCachedResult(F); - if (!LI && Options.UseLoopInfo) + // Only use LoopInfo when the option is set by callers. + LoopInfo *LI = nullptr; + if (Options.UseLoopInfo) LI = &AM.getResult(F); auto *AA = &AM.getResult(F); Index: llvm/test/Transforms/InstCombine/constant-fold-gep.ll =================================================================== --- llvm/test/Transforms/InstCombine/constant-fold-gep.ll +++ llvm/test/Transforms/InstCombine/constant-fold-gep.ll @@ -1,5 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes='require,instcombine' -S | FileCheck %s +; RUN: opt < %s -passes='instcombine' -S | FileCheck %s --check-prefixes=CHECK,NOLOOPINFO +; RUN: opt < %s -passes='instcombine' -S | FileCheck %s --check-prefixes=CHECK,LOOPINFO + target datalayout = "E-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" ; Constant folding should fix notionally out-of-bounds indices @@ -158,13 +160,22 @@ } define ptr @gep_plus_addr_sub_self_in_loop() { -; CHECK-LABEL: @gep_plus_addr_sub_self_in_loop( -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[ADDR:%.*]] = call i64 @get.i64() -; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr getelementptr (i8, ptr @g, i64 sub (i64 0, i64 ptrtoint (ptr @g to i64))), i64 [[ADDR]] -; CHECK-NEXT: call void @use.ptr(ptr [[P2]]) -; CHECK-NEXT: br label [[LOOP]] +; NOLOOPINFO-LABEL: @gep_plus_addr_sub_self_in_loop( +; NOLOOPINFO-NEXT: br label [[LOOP:%.*]] +; NOLOOPINFO: loop: +; NOLOOPINFO-NEXT: [[ADDR:%.*]] = call i64 @get.i64() +; NOLOOPINFO-NEXT: [[P1:%.*]] = getelementptr i8, ptr @g, i64 [[ADDR]] +; NOLOOPINFO-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P1]], i64 sub (i64 0, i64 ptrtoint (ptr @g to i64)) +; NOLOOPINFO-NEXT: call void @use.ptr(ptr [[P2]]) +; NOLOOPINFO-NEXT: br label [[LOOP]] +; +; LOOPINFO-LABEL: @gep_plus_addr_sub_self_in_loop( +; LOOPINFO-NEXT: br label [[LOOP:%.*]] +; LOOPINFO: loop: +; LOOPINFO-NEXT: [[ADDR:%.*]] = call i64 @get.i64() +; LOOPINFO-NEXT: [[P2:%.*]] = getelementptr i8, ptr getelementptr (i8, ptr @g, i64 sub (i64 0, i64 ptrtoint (ptr @g to i64))), i64 [[ADDR]] +; LOOPINFO-NEXT: call void @use.ptr(ptr [[P2]]) +; LOOPINFO-NEXT: br label [[LOOP]] ; %p.int = ptrtoint ptr @g to i64 %p.int.neg = sub i64 0, %p.int Index: llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll +++ llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=loop-vectorize,instcombine,simplifycfg -simplifycfg-require-and-preserve-domtree=1 -tail-predication=enabled < %s -S -o - | FileCheck %s +; RUN: opt -passes='loop-vectorize,instcombine,simplifycfg' -simplifycfg-require-and-preserve-domtree=1 -tail-predication=enabled < %s -S -o - | FileCheck %s target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8.1m.main-arm-none-eabi" Index: llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll +++ llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -mcpu=skx -S -passes=loop-vectorize,instcombine,simplifycfg -simplifycfg-require-and-preserve-domtree=1 -force-vector-width=8 -force-vector-interleave=1 -enable-interleaved-mem-accesses < %s | FileCheck %s -check-prefix=DISABLED_MASKED_STRIDED -; RUN: opt -mcpu=skx -S -passes=loop-vectorize,instcombine,simplifycfg -simplifycfg-require-and-preserve-domtree=1 -force-vector-width=8 -force-vector-interleave=1 -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses < %s | FileCheck %s -check-prefix=ENABLED_MASKED_STRIDED +; RUN: opt -mcpu=skx -S -passes='loop-vectorize,instcombine,simplifycfg' -simplifycfg-require-and-preserve-domtree=1 -force-vector-width=8 -force-vector-interleave=1 -enable-interleaved-mem-accesses < %s | FileCheck %s -check-prefix=DISABLED_MASKED_STRIDED +; RUN: opt -mcpu=skx -S -passes='loop-vectorize,instcombine,simplifycfg' -simplifycfg-require-and-preserve-domtree=1 -force-vector-width=8 -force-vector-interleave=1 -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses < %s | FileCheck %s -check-prefix=ENABLED_MASKED_STRIDED target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" target triple = "i386-unknown-linux-gnu" Index: llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll +++ llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -mcpu=skx -S -passes='loop-vectorize,instcombine,simplifycfg,loop-mssa(licm)' -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck %s -check-prefix=DISABLED_MASKED_STRIDED -; RUN: opt -mcpu=skx -S -passes='loop-vectorize,instcombine,simplifycfg,loop-mssa(licm)' -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck %s -check-prefix=ENABLED_MASKED_STRIDED +; RUN: opt -mcpu=skx -S -passes='loop-vectorize,instcombine,simplifycfg,loop-mssa(licm)' -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck %s -check-prefix=DISABLED_MASKED_STRIDED +; RUN: opt -mcpu=skx -S -passes='loop-vectorize,instcombine,simplifycfg,loop-mssa(licm)' -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck %s -check-prefix=ENABLED_MASKED_STRIDED target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll +++ llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts -; RUN: opt -opaque-pointers=0 < %s -passes=loop-vectorize,instcombine -force-vector-width=4 -force-vector-interleave=1 -debug-only=loop-vectorize -disable-output -print-after=instcombine 2>&1 | FileCheck %s -; RUN: opt -opaque-pointers=0 < %s -passes=loop-vectorize,instcombine -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses -debug-only=loop-vectorize -disable-output -print-after=instcombine 2>&1 | FileCheck %s --check-prefix=INTER +; RUN: opt -opaque-pointers=0 < %s -passes='loop-vectorize,instcombine' -force-vector-width=4 -force-vector-interleave=1 -debug-only=loop-vectorize -disable-output -print-after=instcombine 2>&1 | FileCheck %s +; RUN: opt -opaque-pointers=0 < %s -passes='loop-vectorize,instcombine' -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses -debug-only=loop-vectorize -disable-output -print-after=instcombine 2>&1 | FileCheck %s --check-prefix=INTER target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" Index: llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -opaque-pointers=0 -S -passes=loop-vectorize,instcombine -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses=true -runtime-memory-check-threshold=24 < %s | FileCheck %s +; RUN: opt -opaque-pointers=0 -S -passes='loop-vectorize,instcombine' -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses=true -runtime-memory-check-threshold=24 < %s | FileCheck %s target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"