diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1370,6 +1370,11 @@ // Optimize PHIs by speculating around them when profitable. Note that this // pass needs to be run after any PRE or similar pass as it is essentially // inserting redundancies into the program. This even includes SimplifyCFG. + // FIXME: Lots of backends run SimplifyCFG (and possibly similar passes) in + // the backend pipiline. So it would be nice to elaborate a bit more about + // "needs to be run after". Or maybe the SpeculateAroundPHIsPass should be + // moved to the backend pipeline to ensure that it is run after IR passes in + // the backend pipeline as well? OptimizePM.addPass(SpeculateAroundPHIsPass()); if (PTO.Coroutines) diff --git a/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp b/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp --- a/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp +++ b/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp @@ -557,7 +557,7 @@ /// This routine does the actual speculation around a set of PHI nodes where we /// have determined this to be both safe and profitable. /// -/// This routine handles any spliting of critical edges necessary to create +/// This routine handles any splitting of critical edges necessary to create /// a safe block to speculate into as well as cloning the instructions and /// rewriting all uses. static void speculatePHIs(ArrayRef SpecPNs, @@ -755,7 +755,7 @@ // For each PHI node in this block, check whether there are immediate folding // opportunities from speculation, and whether that speculation will be - // valid. This determise the set of safe PHIs to speculate. + // valid. This determine the set of safe PHIs to speculate. llvm::erase_if(PNs, [&](PHINode *PN) { return !isSafeAndProfitableToSpeculateAroundPHI( *PN, CostSavingsMap, PotentialSpecSet, UnsafeSet, DT, TTI); @@ -792,6 +792,22 @@ return false; } + // Avoid speculation around back-edges. The speculation would result in a + // critical edge split, in effect moving the latch to a new BB. In such + // situations we would need to transfer any loop metadata associated with the + // loop to the new latch, but support for that has not been implemented in + // this pass yet. + // + // This kind of speculation has also been identified as being bad for certain + // targets with HW-loops, as it changes the loop-form at the end of the + // optimization pipeline (maybe it would help if we also rotate the loop + // afterwards). + for (auto &Pred : PredSet) + if (DT.dominates(PNs[0]->getParent(), Pred)) { + LLVM_DEBUG(dbgs() << " Involves a back-edge\n"); + return false; + } + SmallVector SpecPNs = findProfitablePHIs( PNs, CostSavingsMap, PotentialSpecSet, PredSet.size(), DT, TTI); if (SpecPNs.empty()) diff --git a/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll b/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll --- a/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll +++ b/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll @@ -5,11 +5,11 @@ ; RUN: opt -O3 -rotation-max-header-size=1 -S -enable-new-pm=0 < %s | FileCheck %s --check-prefix=HOIST ; RUN: opt -passes='default' -rotation-max-header-size=1 -S < %s | FileCheck %s --check-prefix=HOIST -; RUN: opt -O3 -rotation-max-header-size=2 -S -enable-new-pm=0 < %s | FileCheck %s --check-prefix=ROTATED_LATER_OLDPM -; RUN: opt -passes='default' -rotation-max-header-size=2 -S < %s | FileCheck %s --check-prefix=ROTATED_LATER_NEWPM +; RUN: opt -O3 -rotation-max-header-size=2 -S -enable-new-pm=0 < %s | FileCheck %s --check-prefix=ROTATED_LATER +; RUN: opt -passes='default' -rotation-max-header-size=2 -S < %s | FileCheck %s --check-prefix=ROTATED_LATER -; RUN: opt -O3 -rotation-max-header-size=3 -S -enable-new-pm=0 < %s | FileCheck %s --check-prefix=ROTATE_OLDPM -; RUN: opt -passes='default' -rotation-max-header-size=3 -S < %s | FileCheck %s --check-prefix=ROTATE_NEWPM +; RUN: opt -O3 -rotation-max-header-size=3 -S -enable-new-pm=0 < %s | FileCheck %s --check-prefix=ROTATE +; RUN: opt -passes='default' -rotation-max-header-size=3 -S < %s | FileCheck %s --check-prefix=ROTATE ; This example is produced from a very basic C code: ; @@ -71,105 +71,51 @@ ; HOIST: return: ; HOIST-NEXT: ret void ; -; ROTATED_LATER_OLDPM-LABEL: @_Z4loopi( -; ROTATED_LATER_OLDPM-NEXT: entry: -; ROTATED_LATER_OLDPM-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1 -; ROTATED_LATER_OLDPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] -; ROTATED_LATER_OLDPM: for.cond.preheader: -; ROTATED_LATER_OLDPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 -; ROTATED_LATER_OLDPM-NEXT: [[EXITCOND_NOT3:%.*]] = icmp eq i32 [[TMP0]], 0 -; ROTATED_LATER_OLDPM-NEXT: br i1 [[EXITCOND_NOT3]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]] -; ROTATED_LATER_OLDPM: for.cond.cleanup: -; ROTATED_LATER_OLDPM-NEXT: tail call void @f0() -; ROTATED_LATER_OLDPM-NEXT: tail call void @f2() -; ROTATED_LATER_OLDPM-NEXT: br label [[RETURN]] -; ROTATED_LATER_OLDPM: for.body: -; ROTATED_LATER_OLDPM-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_COND_PREHEADER]] ] -; ROTATED_LATER_OLDPM-NEXT: tail call void @f0() -; ROTATED_LATER_OLDPM-NEXT: tail call void @f1() -; ROTATED_LATER_OLDPM-NEXT: [[INC]] = add nuw i32 [[I_04]], 1 -; ROTATED_LATER_OLDPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]] -; ROTATED_LATER_OLDPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] -; ROTATED_LATER_OLDPM: return: -; ROTATED_LATER_OLDPM-NEXT: ret void +; ROTATED_LATER-LABEL: @_Z4loopi( +; ROTATED_LATER-NEXT: entry: +; ROTATED_LATER-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1 +; ROTATED_LATER-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] +; ROTATED_LATER: for.cond.preheader: +; ROTATED_LATER-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 +; ROTATED_LATER-NEXT: [[EXITCOND_NOT3:%.*]] = icmp eq i32 [[TMP0]], 0 +; ROTATED_LATER-NEXT: br i1 [[EXITCOND_NOT3]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]] +; ROTATED_LATER: for.cond.cleanup: +; ROTATED_LATER-NEXT: tail call void @f0() +; ROTATED_LATER-NEXT: tail call void @f2() +; ROTATED_LATER-NEXT: br label [[RETURN]] +; ROTATED_LATER: for.body: +; ROTATED_LATER-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_COND_PREHEADER]] ] +; ROTATED_LATER-NEXT: tail call void @f0() +; ROTATED_LATER-NEXT: tail call void @f1() +; ROTATED_LATER-NEXT: [[INC]] = add nuw i32 [[I_04]], 1 +; ROTATED_LATER-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]] +; ROTATED_LATER-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] +; ROTATED_LATER: return: +; ROTATED_LATER-NEXT: ret void ; -; ROTATED_LATER_NEWPM-LABEL: @_Z4loopi( -; ROTATED_LATER_NEWPM-NEXT: entry: -; ROTATED_LATER_NEWPM-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1 -; ROTATED_LATER_NEWPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] -; ROTATED_LATER_NEWPM: for.cond.preheader: -; ROTATED_LATER_NEWPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 -; ROTATED_LATER_NEWPM-NEXT: [[EXITCOND_NOT3:%.*]] = icmp eq i32 [[TMP0]], 0 -; ROTATED_LATER_NEWPM-NEXT: br i1 [[EXITCOND_NOT3]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND_PREHEADER_FOR_BODY_CRIT_EDGE:%.*]] -; ROTATED_LATER_NEWPM: for.cond.preheader.for.body_crit_edge: -; ROTATED_LATER_NEWPM-NEXT: [[INC_1:%.*]] = add nuw i32 0, 1 -; ROTATED_LATER_NEWPM-NEXT: br label [[FOR_BODY:%.*]] -; ROTATED_LATER_NEWPM: for.cond.cleanup: -; ROTATED_LATER_NEWPM-NEXT: tail call void @f0() -; ROTATED_LATER_NEWPM-NEXT: tail call void @f2() -; ROTATED_LATER_NEWPM-NEXT: br label [[RETURN]] -; ROTATED_LATER_NEWPM: for.body: -; ROTATED_LATER_NEWPM-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[INC_0:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ], [ [[INC_1]], [[FOR_COND_PREHEADER_FOR_BODY_CRIT_EDGE]] ] -; ROTATED_LATER_NEWPM-NEXT: tail call void @f0() -; ROTATED_LATER_NEWPM-NEXT: tail call void @f1() -; ROTATED_LATER_NEWPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_PHI]], [[TMP0]] -; ROTATED_LATER_NEWPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]] -; ROTATED_LATER_NEWPM: for.body.for.body_crit_edge: -; ROTATED_LATER_NEWPM-NEXT: [[INC_0]] = add nuw i32 [[INC_PHI]], 1 -; ROTATED_LATER_NEWPM-NEXT: br label [[FOR_BODY]] -; ROTATED_LATER_NEWPM: return: -; ROTATED_LATER_NEWPM-NEXT: ret void -; -; ROTATE_OLDPM-LABEL: @_Z4loopi( -; ROTATE_OLDPM-NEXT: entry: -; ROTATE_OLDPM-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1 -; ROTATE_OLDPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] -; ROTATE_OLDPM: for.cond.preheader: -; ROTATE_OLDPM-NEXT: [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1 -; ROTATE_OLDPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] -; ROTATE_OLDPM: for.body.preheader: -; ROTATE_OLDPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 -; ROTATE_OLDPM-NEXT: br label [[FOR_BODY:%.*]] -; ROTATE_OLDPM: for.cond.cleanup: -; ROTATE_OLDPM-NEXT: tail call void @f0() -; ROTATE_OLDPM-NEXT: tail call void @f2() -; ROTATE_OLDPM-NEXT: br label [[RETURN]] -; ROTATE_OLDPM: for.body: -; ROTATE_OLDPM-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; ROTATE_OLDPM-NEXT: tail call void @f0() -; ROTATE_OLDPM-NEXT: tail call void @f1() -; ROTATE_OLDPM-NEXT: [[INC]] = add nuw nsw i32 [[I_04]], 1 -; ROTATE_OLDPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]] -; ROTATE_OLDPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] -; ROTATE_OLDPM: return: -; ROTATE_OLDPM-NEXT: ret void -; -; ROTATE_NEWPM-LABEL: @_Z4loopi( -; ROTATE_NEWPM-NEXT: entry: -; ROTATE_NEWPM-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1 -; ROTATE_NEWPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] -; ROTATE_NEWPM: for.cond.preheader: -; ROTATE_NEWPM-NEXT: [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1 -; ROTATE_NEWPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] -; ROTATE_NEWPM: for.body.preheader: -; ROTATE_NEWPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 -; ROTATE_NEWPM-NEXT: [[INC_1:%.*]] = add nuw nsw i32 0, 1 -; ROTATE_NEWPM-NEXT: br label [[FOR_BODY:%.*]] -; ROTATE_NEWPM: for.cond.cleanup: -; ROTATE_NEWPM-NEXT: tail call void @f0() -; ROTATE_NEWPM-NEXT: tail call void @f2() -; ROTATE_NEWPM-NEXT: br label [[RETURN]] -; ROTATE_NEWPM: for.body: -; ROTATE_NEWPM-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[INC_0:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ], [ [[INC_1]], [[FOR_BODY_PREHEADER]] ] -; ROTATE_NEWPM-NEXT: tail call void @f0() -; ROTATE_NEWPM-NEXT: tail call void @f1() -; ROTATE_NEWPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_PHI]], [[TMP0]] -; ROTATE_NEWPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]] -; ROTATE_NEWPM: for.body.for.body_crit_edge: -; ROTATE_NEWPM-NEXT: [[INC_0]] = add nuw nsw i32 [[INC_PHI]], 1 -; ROTATE_NEWPM-NEXT: br label [[FOR_BODY]] -; ROTATE_NEWPM: return: -; ROTATE_NEWPM-NEXT: ret void +; ROTATE-LABEL: @_Z4loopi( +; ROTATE-NEXT: entry: +; ROTATE-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1 +; ROTATE-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] +; ROTATE: for.cond.preheader: +; ROTATE-NEXT: [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1 +; ROTATE-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] +; ROTATE: for.body.preheader: +; ROTATE-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 +; ROTATE-NEXT: br label [[FOR_BODY:%.*]] +; ROTATE: for.cond.cleanup: +; ROTATE-NEXT: tail call void @f0() +; ROTATE-NEXT: tail call void @f2() +; ROTATE-NEXT: br label [[RETURN]] +; ROTATE: for.body: +; ROTATE-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; ROTATE-NEXT: tail call void @f0() +; ROTATE-NEXT: tail call void @f1() +; ROTATE-NEXT: [[INC]] = add nuw nsw i32 [[I_04]], 1 +; ROTATE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]] +; ROTATE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] +; ROTATE: return: +; ROTATE-NEXT: ret void ; entry: %width.addr = alloca i32, align 4 diff --git a/llvm/test/Transforms/PhaseOrdering/reassociate-after-unroll.ll b/llvm/test/Transforms/PhaseOrdering/reassociate-after-unroll.ll --- a/llvm/test/Transforms/PhaseOrdering/reassociate-after-unroll.ll +++ b/llvm/test/Transforms/PhaseOrdering/reassociate-after-unroll.ll @@ -1,108 +1,58 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; REQUIRES: powerpc-registered-target -; RUN: opt -O2 -enable-new-pm=0 -S < %s | FileCheck %s --check-prefix=OLDPM -; RUN: opt -passes='default' -S < %s | FileCheck %s --check-prefix=NEWPM +; RUN: opt -O2 -enable-new-pm=0 -S < %s | FileCheck %s --check-prefix=CHECK +; RUN: opt -passes='default' -S < %s | FileCheck %s --check-prefix=CHECK target datalayout = "e-m:e-i64:64-n32:64" target triple = "powerpc64le-unknown-linux-gnu" define dso_local i64 @func(i64 %blah, i64 %limit) #0 { -; OLDPM-LABEL: @func( -; OLDPM-NEXT: entry: -; OLDPM-NEXT: [[CMP4:%.*]] = icmp eq i64 [[LIMIT:%.*]], 0 -; OLDPM-NEXT: br i1 [[CMP4]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_LR_PH:%.*]] -; OLDPM: for.body.lr.ph: -; OLDPM-NEXT: [[CONV:%.*]] = and i64 [[BLAH:%.*]], 4294967295 -; OLDPM-NEXT: [[TMP0:%.*]] = add i64 [[LIMIT]], -1 -; OLDPM-NEXT: [[XTRAITER:%.*]] = and i64 [[LIMIT]], 7 -; OLDPM-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7 -; OLDPM-NEXT: br i1 [[TMP1]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_LR_PH_NEW:%.*]] -; OLDPM: for.body.lr.ph.new: -; OLDPM-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[LIMIT]], -8 -; OLDPM-NEXT: br label [[FOR_BODY:%.*]] -; OLDPM: for.cond.cleanup.loopexit.unr-lcssa: -; OLDPM-NEXT: [[ADD_LCSSA_PH:%.*]] = phi i64 [ undef, [[FOR_BODY_LR_PH]] ], [ [[ADD_7:%.*]], [[FOR_BODY]] ] -; OLDPM-NEXT: [[K_05_UNR:%.*]] = phi i64 [ 1, [[FOR_BODY_LR_PH]] ], [ [[AND:%.*]], [[FOR_BODY]] ] -; OLDPM-NEXT: [[LCMP_MOD:%.*]] = icmp eq i64 [[XTRAITER]], 0 -; OLDPM-NEXT: br i1 [[LCMP_MOD]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_EPIL:%.*]] -; OLDPM: for.body.epil: -; OLDPM-NEXT: [[G_06_EPIL:%.*]] = phi i64 [ [[ADD_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[ADD_LCSSA_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ] -; OLDPM-NEXT: [[K_05_EPIL:%.*]] = phi i64 [ [[AND_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[K_05_UNR]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ] -; OLDPM-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_SUB:%.*]], [[FOR_BODY_EPIL]] ], [ [[XTRAITER]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ] -; OLDPM-NEXT: [[AND_EPIL]] = and i64 [[CONV]], [[K_05_EPIL]] -; OLDPM-NEXT: [[ADD_EPIL]] = add i64 [[AND_EPIL]], [[G_06_EPIL]] -; OLDPM-NEXT: [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1 -; OLDPM-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0 -; OLDPM-NEXT: br i1 [[EPIL_ITER_CMP]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_EPIL]], !llvm.loop !0 -; OLDPM: for.cond.cleanup: -; OLDPM-NEXT: [[G_0_LCSSA:%.*]] = phi i64 [ undef, [[ENTRY:%.*]] ], [ [[ADD_LCSSA_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ], [ [[ADD_EPIL]], [[FOR_BODY_EPIL]] ] -; OLDPM-NEXT: ret i64 [[G_0_LCSSA]] -; OLDPM: for.body: -; OLDPM-NEXT: [[G_06:%.*]] = phi i64 [ undef, [[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_7]], [[FOR_BODY]] ] -; OLDPM-NEXT: [[K_05:%.*]] = phi i64 [ 1, [[FOR_BODY_LR_PH_NEW]] ], [ [[AND]], [[FOR_BODY]] ] -; OLDPM-NEXT: [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[FOR_BODY_LR_PH_NEW]] ], [ [[NITER_NSUB_7:%.*]], [[FOR_BODY]] ] -; OLDPM-NEXT: [[AND]] = and i64 [[CONV]], [[K_05]] -; OLDPM-NEXT: [[REASS_ADD:%.*]] = shl nuw nsw i64 [[AND]], 1 -; OLDPM-NEXT: [[ADD_1:%.*]] = add i64 [[G_06]], [[REASS_ADD]] -; OLDPM-NEXT: [[REASS_ADD9:%.*]] = shl nuw nsw i64 [[AND]], 1 -; OLDPM-NEXT: [[ADD_3:%.*]] = add i64 [[ADD_1]], [[REASS_ADD9]] -; OLDPM-NEXT: [[REASS_ADD10:%.*]] = shl nuw nsw i64 [[AND]], 1 -; OLDPM-NEXT: [[ADD_5:%.*]] = add i64 [[ADD_3]], [[REASS_ADD10]] -; OLDPM-NEXT: [[REASS_ADD11:%.*]] = shl nuw nsw i64 [[AND]], 1 -; OLDPM-NEXT: [[ADD_7]] = add i64 [[ADD_5]], [[REASS_ADD11]] -; OLDPM-NEXT: [[NITER_NSUB_7]] = add i64 [[NITER]], -8 -; OLDPM-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NSUB_7]], 0 -; OLDPM-NEXT: br i1 [[NITER_NCMP_7]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]], label [[FOR_BODY]] -; -; NEWPM-LABEL: @func( -; NEWPM-NEXT: entry: -; NEWPM-NEXT: [[CMP4:%.*]] = icmp eq i64 [[LIMIT:%.*]], 0 -; NEWPM-NEXT: br i1 [[CMP4]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_LR_PH:%.*]] -; NEWPM: for.body.lr.ph: -; NEWPM-NEXT: [[CONV:%.*]] = and i64 [[BLAH:%.*]], 4294967295 -; NEWPM-NEXT: [[TMP0:%.*]] = add i64 [[LIMIT]], -1 -; NEWPM-NEXT: [[XTRAITER:%.*]] = and i64 [[LIMIT]], 7 -; NEWPM-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7 -; NEWPM-NEXT: br i1 [[TMP1]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_LR_PH_NEW:%.*]] -; NEWPM: for.body.lr.ph.new: -; NEWPM-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[LIMIT]], -8 -; NEWPM-NEXT: [[AND_0:%.*]] = and i64 [[CONV]], 1 -; NEWPM-NEXT: br label [[FOR_BODY:%.*]] -; NEWPM: for.cond.cleanup.loopexit.unr-lcssa: -; NEWPM-NEXT: [[ADD_LCSSA_PH:%.*]] = phi i64 [ undef, [[FOR_BODY_LR_PH]] ], [ [[ADD_7:%.*]], [[FOR_BODY]] ] -; NEWPM-NEXT: [[K_05_UNR:%.*]] = phi i64 [ 1, [[FOR_BODY_LR_PH]] ], [ [[AND_PHI:%.*]], [[FOR_BODY]] ] -; NEWPM-NEXT: [[LCMP_MOD:%.*]] = icmp eq i64 [[XTRAITER]], 0 -; NEWPM-NEXT: br i1 [[LCMP_MOD]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_EPIL:%.*]] -; NEWPM: for.body.epil: -; NEWPM-NEXT: [[G_06_EPIL:%.*]] = phi i64 [ [[ADD_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[ADD_LCSSA_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ] -; NEWPM-NEXT: [[K_05_EPIL:%.*]] = phi i64 [ [[AND_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[K_05_UNR]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ] -; NEWPM-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_SUB:%.*]], [[FOR_BODY_EPIL]] ], [ [[XTRAITER]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ] -; NEWPM-NEXT: [[AND_EPIL]] = and i64 [[CONV]], [[K_05_EPIL]] -; NEWPM-NEXT: [[ADD_EPIL]] = add i64 [[AND_EPIL]], [[G_06_EPIL]] -; NEWPM-NEXT: [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1 -; NEWPM-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0 -; NEWPM-NEXT: br i1 [[EPIL_ITER_CMP]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_EPIL]], !llvm.loop !0 -; NEWPM: for.cond.cleanup: -; NEWPM-NEXT: [[G_0_LCSSA:%.*]] = phi i64 [ undef, [[ENTRY:%.*]] ], [ [[ADD_LCSSA_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ], [ [[ADD_EPIL]], [[FOR_BODY_EPIL]] ] -; NEWPM-NEXT: ret i64 [[G_0_LCSSA]] -; NEWPM: for.body: -; NEWPM-NEXT: [[G_06:%.*]] = phi i64 [ undef, [[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_7]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ] -; NEWPM-NEXT: [[AND_PHI]] = phi i64 [ [[AND_0]], [[FOR_BODY_LR_PH_NEW]] ], [ [[AND_1:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE]] ] -; NEWPM-NEXT: [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[FOR_BODY_LR_PH_NEW]] ], [ [[NITER_NSUB_7:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE]] ] -; NEWPM-NEXT: [[REASS_ADD:%.*]] = shl nuw nsw i64 [[AND_PHI]], 1 -; NEWPM-NEXT: [[ADD_1:%.*]] = add i64 [[G_06]], [[REASS_ADD]] -; NEWPM-NEXT: [[REASS_ADD9:%.*]] = shl nuw nsw i64 [[AND_PHI]], 1 -; NEWPM-NEXT: [[ADD_3:%.*]] = add i64 [[ADD_1]], [[REASS_ADD9]] -; NEWPM-NEXT: [[REASS_ADD10:%.*]] = shl nuw nsw i64 [[AND_PHI]], 1 -; NEWPM-NEXT: [[ADD_5:%.*]] = add i64 [[ADD_3]], [[REASS_ADD10]] -; NEWPM-NEXT: [[REASS_ADD11:%.*]] = shl nuw nsw i64 [[AND_PHI]], 1 -; NEWPM-NEXT: [[ADD_7]] = add i64 [[ADD_5]], [[REASS_ADD11]] -; NEWPM-NEXT: [[NITER_NSUB_7]] = add i64 [[NITER]], -8 -; NEWPM-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NSUB_7]], 0 -; NEWPM-NEXT: br i1 [[NITER_NCMP_7]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]] -; NEWPM: for.body.for.body_crit_edge: -; NEWPM-NEXT: [[AND_1]] = and i64 [[CONV]], [[AND_PHI]] -; NEWPM-NEXT: br label [[FOR_BODY]] +; CHECK-LABEL: @func( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP4_NOT:%.*]] = icmp eq i64 [[LIMIT:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP4_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_LR_PH:%.*]] +; CHECK: for.body.lr.ph: +; CHECK-NEXT: [[CONV:%.*]] = and i64 [[BLAH:%.*]], 4294967295 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LIMIT]], -1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[LIMIT]], 7 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7 +; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_LR_PH_NEW:%.*]] +; CHECK: for.body.lr.ph.new: +; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[LIMIT]], -8 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup.loopexit.unr-lcssa: +; CHECK-NEXT: [[G_06_UNR:%.*]] = phi i64 [ undef, [[FOR_BODY_LR_PH]] ], [ [[ADD_7:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[K_05_UNR:%.*]] = phi i64 [ 1, [[FOR_BODY_LR_PH]] ], [ [[AND:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_EPIL:%.*]] +; CHECK: for.body.epil: +; CHECK-NEXT: [[G_06_EPIL:%.*]] = phi i64 [ [[ADD_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[G_06_UNR]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[K_05_EPIL:%.*]] = phi i64 [ [[AND_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[K_05_UNR]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_SUB:%.*]], [[FOR_BODY_EPIL]] ], [ [[XTRAITER]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[AND_EPIL]] = and i64 [[CONV]], [[K_05_EPIL]] +; CHECK-NEXT: [[ADD_EPIL]] = add i64 [[AND_EPIL]], [[G_06_EPIL]] +; CHECK-NEXT: [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1 +; CHECK-NEXT: [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0 +; CHECK-NEXT: br i1 [[EPIL_ITER_CMP_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_EPIL]], [[LOOP0:!llvm.loop !.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: [[G_0_LCSSA:%.*]] = phi i64 [ undef, [[ENTRY:%.*]] ], [ [[G_06_UNR]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ], [ [[ADD_EPIL]], [[FOR_BODY_EPIL]] ] +; CHECK-NEXT: ret i64 [[G_0_LCSSA]] +; CHECK: for.body: +; CHECK-NEXT: [[G_06:%.*]] = phi i64 [ undef, [[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_7]], [[FOR_BODY]] ] +; CHECK-NEXT: [[K_05:%.*]] = phi i64 [ 1, [[FOR_BODY_LR_PH_NEW]] ], [ [[AND]], [[FOR_BODY]] ] +; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[FOR_BODY_LR_PH_NEW]] ], [ [[NITER_NSUB_7:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[AND]] = and i64 [[CONV]], [[K_05]] +; CHECK-NEXT: [[REASS_ADD:%.*]] = shl nuw nsw i64 [[AND]], 1 +; CHECK-NEXT: [[ADD_1:%.*]] = add i64 [[G_06]], [[REASS_ADD]] +; CHECK-NEXT: [[REASS_ADD9:%.*]] = shl nuw nsw i64 [[AND]], 1 +; CHECK-NEXT: [[ADD_3:%.*]] = add i64 [[ADD_1]], [[REASS_ADD9]] +; CHECK-NEXT: [[REASS_ADD10:%.*]] = shl nuw nsw i64 [[AND]], 1 +; CHECK-NEXT: [[ADD_5:%.*]] = add i64 [[ADD_3]], [[REASS_ADD10]] +; CHECK-NEXT: [[REASS_ADD11:%.*]] = shl nuw nsw i64 [[AND]], 1 +; CHECK-NEXT: [[ADD_7]] = add i64 [[ADD_5]], [[REASS_ADD11]] +; CHECK-NEXT: [[NITER_NSUB_7]] = add i64 [[NITER]], -8 +; CHECK-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NSUB_7]], 0 +; CHECK-NEXT: br i1 [[NITER_NCMP_7]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]], label [[FOR_BODY]] ; entry: %blah.addr = alloca i64, align 8 diff --git a/llvm/test/Transforms/SpeculateAroundPHIs/loop-latch.ll b/llvm/test/Transforms/SpeculateAroundPHIs/loop-latch.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SpeculateAroundPHIs/loop-latch.ll @@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes="spec-phis,print" %s 2>&1 | FileCheck %s + +; This check aims at verifying that loop metadata isn't discarded and that it +; is placed on the latch (detected by the print pass).. +; CHECK: Parallel Loop at depth 1 containing: + +; Also verify that we do not split a critical back-edge. If we do, we'd need +; to transfer the loop metadata to the new latch (or discard it). We also need +; to evaluate if such speculation is beneficial (at least downstream targets +; have seen problems with missing HW-loops when letting the pass speculate +; around back-edges). +define void @hello(i16 %n) { +; CHECK-LABEL: @hello( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i16 [[N:%.*]], 8 +; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_02:%.*]] = phi i16 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ADD]] = add nuw nsw i16 [[I_02]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[ADD]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]], [[LOOP0:!llvm.loop !.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + %cmp1 = icmp slt i16 %n, 8 + br i1 %cmp1, label %for.body, label %for.end + +for.body: + %i.02 = phi i16 [ %add, %for.body ], [ 0, %entry ] + %add = add nuw nsw i16 %i.02, 1 + %cmp = icmp slt i16 %add, %n + br i1 %cmp, label %for.body, label %for.end, !llvm.loop !0 + +for.end: + ret void +} + +!0 = !{!0, !1} +!1 = !{!"llvm.loop.vectorize.width", i32 1}