diff --git a/clang/test/CodeGen/thinlto-distributed-newpm.ll b/clang/test/CodeGen/thinlto-distributed-newpm.ll --- a/clang/test/CodeGen/thinlto-distributed-newpm.ll +++ b/clang/test/CodeGen/thinlto-distributed-newpm.ll @@ -165,7 +165,6 @@ ; CHECK-O: Running pass: InstSimplifyPass on main ; CHECK-O: Running pass: DivRemPairsPass on main ; CHECK-O: Running pass: SimplifyCFGPass on main -; CHECK-O: Running pass: SpeculateAroundPHIsPass on main ; CHECK-O: Running pass: CGProfilePass ; CHECK-O: Running pass: GlobalDCEPass ; CHECK-O: Running pass: ConstantMergePass diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1448,11 +1448,6 @@ // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. OptimizePM.addPass(SimplifyCFGPass()); - // Optimize PHIs by speculating around them when profitable. Note that this - // pass needs to be run after any PRE or similar pass as it is essentially - // inserting redundancies into the program. This even includes SimplifyCFG. - OptimizePM.addPass(SpeculateAroundPHIsPass()); - if (PTO.Coroutines) OptimizePM.addPass(CoroCleanupPass()); diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -237,7 +237,6 @@ ; CHECK-O-NEXT: Running pass: InstSimplifyPass ; CHECK-O-NEXT: Running pass: DivRemPairsPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass -; CHECK-O-NEXT: Running pass: SpeculateAroundPHIsPass ; CHECK-EP-OPTIMIZER-LAST: Running pass: NoOpFunctionPass ; CHECK-O-NEXT: Running pass: CGProfilePass ; CHECK-O-NEXT: Running pass: GlobalDCEPass diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-defaults.ll @@ -218,7 +218,6 @@ ; CHECK-POSTLINK-O-NEXT: Running pass: InstSimplifyPass ; CHECK-POSTLINK-O-NEXT: Running pass: DivRemPairsPass ; CHECK-POSTLINK-O-NEXT: Running pass: SimplifyCFGPass -; CHECK-POSTLINK-O-NEXT: Running pass: SpeculateAroundPHIsPass ; CHECK-POSTLINK-O-NEXT: Running pass: CGProfilePass ; CHECK-POSTLINK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-POSTLINK-O-NEXT: Running pass: ConstantMergePass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -187,7 +187,6 @@ ; CHECK-O-NEXT: Running pass: InstSimplifyPass ; CHECK-O-NEXT: Running pass: DivRemPairsPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass -; CHECK-O-NEXT: Running pass: SpeculateAroundPHIsPass ; CHECK-O-NEXT: Running pass: CGProfilePass ; CHECK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-O-NEXT: Running pass: ConstantMergePass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -199,7 +199,6 @@ ; CHECK-O-NEXT: Running pass: InstSimplifyPass ; CHECK-O-NEXT: Running pass: DivRemPairsPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass -; CHECK-O-NEXT: Running pass: SpeculateAroundPHIsPass ; CHECK-O-NEXT: Running pass: CGProfilePass ; CHECK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-O-NEXT: Running pass: ConstantMergePass diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll b/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll --- a/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll +++ b/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll @@ -97,12 +97,9 @@ ; CHECK-GENERIC-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i16, i16* [[ARG_3:%.*]], i64 undef ; CHECK-GENERIC-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, i32* [[ARG_1:%.*]], i64 undef ; CHECK-GENERIC-NEXT: [[CMP52_NOT:%.*]] = icmp eq i32 [[ARG_0:%.*]], 0 -; CHECK-GENERIC-NEXT: br i1 [[CMP52_NOT]], label [[FOR_END:%.*]], label [[ENTRY_FOR_BODY6_CRIT_EDGE:%.*]] -; CHECK-GENERIC: entry.for.body6_crit_edge: -; CHECK-GENERIC-NEXT: [[INC_1:%.*]] = add nuw i32 0, 1 -; CHECK-GENERIC-NEXT: br label [[FOR_BODY6:%.*]] +; CHECK-GENERIC-NEXT: br i1 [[CMP52_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY6:%.*]] ; CHECK-GENERIC: for.body6: -; CHECK-GENERIC-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[INC_0:%.*]], [[FOR_BODY6_FOR_BODY6_CRIT_EDGE:%.*]] ], [ [[INC_1]], [[ENTRY_FOR_BODY6_CRIT_EDGE]] ] +; CHECK-GENERIC-NEXT: [[K_03:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY6]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-GENERIC-NEXT: [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 ; CHECK-GENERIC-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 ; CHECK-GENERIC-NEXT: [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2 @@ -111,11 +108,9 @@ ; CHECK-GENERIC-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX20]], align 4 ; CHECK-GENERIC-NEXT: [[ADD21:%.*]] = add nsw i32 [[MUL16]], [[TMP2]] ; CHECK-GENERIC-NEXT: store i32 [[ADD21]], i32* [[ARRAYIDX20]], align 4 -; CHECK-GENERIC-NEXT: [[CMP5:%.*]] = icmp ult i32 [[INC_PHI]], [[ARG_0]] -; CHECK-GENERIC-NEXT: br i1 [[CMP5]], label [[FOR_BODY6_FOR_BODY6_CRIT_EDGE]], label [[FOR_END]], !llvm.loop [[LOOP0:![0-9]+]] -; CHECK-GENERIC: for.body6.for.body6_crit_edge: -; CHECK-GENERIC-NEXT: [[INC_0]] = add nuw i32 [[INC_PHI]], 1 -; CHECK-GENERIC-NEXT: br label [[FOR_BODY6]] +; CHECK-GENERIC-NEXT: [[INC]] = add nuw i32 [[K_03]], 1 +; CHECK-GENERIC-NEXT: [[CMP5:%.*]] = icmp ult i32 [[INC]], [[ARG_0]] +; CHECK-GENERIC-NEXT: br i1 [[CMP5]], label [[FOR_BODY6]], label [[FOR_END]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-GENERIC: for.end: ; CHECK-GENERIC-NEXT: ret void ; diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll --- a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll @@ -156,36 +156,27 @@ ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[DOT0:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 0 -; CHECK-NEXT: [[DOT017:%.*]] = getelementptr inbounds float, float* [[A]], i64 0 -; CHECK-NEXT: [[DOT018:%.*]] = getelementptr inbounds float, float* [[B]], i64 0 -; CHECK-NEXT: [[INDEX_NEXT_0:%.*]] = add nuw i64 0, 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX_NEXT_PHI:%.*]] = phi i64 [ [[INDEX_NEXT_0]], [[VECTOR_PH]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY_VECTOR_BODY_CRIT_EDGE:%.*]] ] -; CHECK-NEXT: [[DOTPHI:%.*]] = phi float* [ [[DOT018]], [[VECTOR_PH]] ], [ [[DOT120:%.*]], [[VECTOR_BODY_VECTOR_BODY_CRIT_EDGE]] ] -; CHECK-NEXT: [[DOTPHI21:%.*]] = phi float* [ [[DOT017]], [[VECTOR_PH]] ], [ [[DOT119:%.*]], [[VECTOR_BODY_VECTOR_BODY_CRIT_EDGE]] ] -; CHECK-NEXT: [[DOTPHI22:%.*]] = phi i32* [ [[DOT0]], [[VECTOR_PH]] ], [ [[DOT1:%.*]], [[VECTOR_BODY_VECTOR_BODY_CRIT_EDGE]] ] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[DOTPHI22]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4, !alias.scope !8 -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[DOTPHI21]] to <4 x float>* -; CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4, !alias.scope !11 -; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[WIDE_LOAD14]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[DOTPHI]] to <4 x float>* -; CHECK-NEXT: [[WIDE_LOAD15:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4, !alias.scope !13, !noalias !15 -; CHECK-NEXT: [[TMP7:%.*]] = fadd <4 x float> [[TMP5]], [[WIDE_LOAD15]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP5]], <4 x float> [[TMP7]] -; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[DOTPHI]] to <4 x float>* -; CHECK-NEXT: store <4 x float> [[PREDPHI]], <4 x float>* [[TMP8]], align 4, !alias.scope !13, !noalias !15 -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT_PHI]], 10000 -; CHECK-NEXT: br i1 [[TMP9]], label [[EXIT:%.*]], label [[VECTOR_BODY_VECTOR_BODY_CRIT_EDGE]], !llvm.loop [[LOOP16:![0-9]+]] -; CHECK: vector.body.vector.body_crit_edge: -; CHECK-NEXT: [[DOT1]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDEX_NEXT_PHI]] -; CHECK-NEXT: [[DOT119]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX_NEXT_PHI]] -; CHECK-NEXT: [[DOT120]] = getelementptr inbounds float, float* [[B]], i64 [[INDEX_NEXT_PHI]] -; CHECK-NEXT: [[INDEX_NEXT_1]] = add nuw i64 [[INDEX_NEXT_PHI]], 4 -; CHECK-NEXT: br label [[VECTOR_BODY]] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4, !alias.scope !8 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[TMP5]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4, !alias.scope !11 +; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x float> [[WIDE_LOAD14]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD15:%.*]] = load <4 x float>, <4 x float>* [[TMP9]], align 4, !alias.scope !13, !noalias !15 +; CHECK-NEXT: [[TMP10:%.*]] = fadd <4 x float> [[TMP7]], [[WIDE_LOAD15]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x float> [[TMP7]], <4 x float> [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[TMP8]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[PREDPHI]], <4 x float>* [[TMP11]], align 4, !alias.scope !13, !noalias !15 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 +; CHECK-NEXT: br i1 [[TMP12]], label [[EXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: loop.body: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[C_GEP:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[IV1]] diff --git a/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll b/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll --- a/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll +++ b/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll @@ -104,21 +104,18 @@ ; ROTATED_LATER_NEWPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] ; ROTATED_LATER_NEWPM: for.body.preheader: ; ROTATED_LATER_NEWPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 -; ROTATED_LATER_NEWPM-NEXT: [[INC_1:%.*]] = add nuw nsw i32 0, 1 ; ROTATED_LATER_NEWPM-NEXT: br label [[FOR_BODY:%.*]] ; ROTATED_LATER_NEWPM: for.cond.cleanup: ; ROTATED_LATER_NEWPM-NEXT: tail call void @f0() ; ROTATED_LATER_NEWPM-NEXT: tail call void @f2() ; ROTATED_LATER_NEWPM-NEXT: br label [[RETURN]] ; ROTATED_LATER_NEWPM: for.body: -; ROTATED_LATER_NEWPM-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[INC_0:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ], [ [[INC_1]], [[FOR_BODY_PREHEADER]] ] +; ROTATED_LATER_NEWPM-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; ROTATED_LATER_NEWPM-NEXT: tail call void @f0() ; ROTATED_LATER_NEWPM-NEXT: tail call void @f1() -; ROTATED_LATER_NEWPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_PHI]], [[TMP0]] -; ROTATED_LATER_NEWPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]] -; ROTATED_LATER_NEWPM: for.body.for.body_crit_edge: -; ROTATED_LATER_NEWPM-NEXT: [[INC_0]] = add nuw nsw i32 [[INC_PHI]], 1 -; ROTATED_LATER_NEWPM-NEXT: br label [[FOR_BODY]] +; ROTATED_LATER_NEWPM-NEXT: [[INC]] = add nuw nsw i32 [[I_04]], 1 +; ROTATED_LATER_NEWPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]] +; ROTATED_LATER_NEWPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; ROTATED_LATER_NEWPM: return: ; ROTATED_LATER_NEWPM-NEXT: ret void ; @@ -155,21 +152,18 @@ ; ROTATE_NEWPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] ; ROTATE_NEWPM: for.body.preheader: ; ROTATE_NEWPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 -; ROTATE_NEWPM-NEXT: [[INC_1:%.*]] = add nuw nsw i32 0, 1 ; ROTATE_NEWPM-NEXT: br label [[FOR_BODY:%.*]] ; ROTATE_NEWPM: for.cond.cleanup: ; ROTATE_NEWPM-NEXT: tail call void @f0() ; ROTATE_NEWPM-NEXT: tail call void @f2() ; ROTATE_NEWPM-NEXT: br label [[RETURN]] ; ROTATE_NEWPM: for.body: -; ROTATE_NEWPM-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[INC_0:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ], [ [[INC_1]], [[FOR_BODY_PREHEADER]] ] +; ROTATE_NEWPM-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; ROTATE_NEWPM-NEXT: tail call void @f0() ; ROTATE_NEWPM-NEXT: tail call void @f1() -; ROTATE_NEWPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_PHI]], [[TMP0]] -; ROTATE_NEWPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]] -; ROTATE_NEWPM: for.body.for.body_crit_edge: -; ROTATE_NEWPM-NEXT: [[INC_0]] = add nuw nsw i32 [[INC_PHI]], 1 -; ROTATE_NEWPM-NEXT: br label [[FOR_BODY]] +; ROTATE_NEWPM-NEXT: [[INC]] = add nuw nsw i32 [[I_04]], 1 +; ROTATE_NEWPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]] +; ROTATE_NEWPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; ROTATE_NEWPM: return: ; ROTATE_NEWPM-NEXT: ret void ;