Index: llvm/lib/Passes/PassBuilderPipelines.cpp =================================================================== --- llvm/lib/Passes/PassBuilderPipelines.cpp +++ llvm/lib/Passes/PassBuilderPipelines.cpp @@ -580,6 +580,7 @@ // Specially optimize memory movement as it doesn't look like dataflow in SSA. FPM.addPass(MemCpyOptPass()); + FPM.addPass(TailCallElimPass()); FPM.addPass(DSEPass()); FPM.addPass(createFunctionToLoopPassAdaptor( Index: llvm/test/Other/new-pm-defaults.ll =================================================================== --- llvm/test/Other/new-pm-defaults.ll +++ llvm/test/Other/new-pm-defaults.ll @@ -204,6 +204,7 @@ ; CHECK-O-NEXT: Running pass: ADCEPass ; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass +; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass ; CHECK-O23SZ-NEXT: Running pass: DSEPass ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass Index: llvm/test/Other/new-pm-thinlto-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-defaults.ll +++ llvm/test/Other/new-pm-thinlto-defaults.ll @@ -175,6 +175,7 @@ ; CHECK-O-NEXT: Running pass: ADCEPass ; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass +; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass ; CHECK-O23SZ-NEXT: Running pass: DSEPass ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass Index: llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -147,6 +147,7 @@ ; CHECK-O1-NEXT: Running pass: CoroElidePass ; CHECK-O-NEXT: Running pass: ADCEPass ; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass +; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass ; CHECK-O23SZ-NEXT: Running pass: DSEPass ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass Index: llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -156,6 +156,7 @@ ; CHECK-O1-NEXT: Running pass: CoroElidePass ; CHECK-O-NEXT: Running pass: ADCEPass ; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass +; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass ; CHECK-O23SZ-NEXT: Running pass: DSEPass ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass Index: llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +++ llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll @@ -185,6 +185,7 @@ ; CHECK-O1-NEXT: Running pass: CoroElidePass ; CHECK-O-NEXT: Running pass: ADCEPass ; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass +; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass ; CHECK-O23SZ-NEXT: Running pass: DSEPass ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass Index: llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -150,6 +150,7 @@ ; CHECK-O1-NEXT: Running pass: CoroElidePass ; CHECK-O-NEXT: Running pass: ADCEPass ; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass +; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass ; CHECK-O23SZ-NEXT: Running pass: DSEPass ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass Index: llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll +++ llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll @@ -291,7 +291,7 @@ define double @external_use_with_fast_math(double* %a, i64 %n) { ; AUTO_VEC-LABEL: @external_use_with_fast_math( ; AUTO_VEC-NEXT: entry: -; AUTO_VEC-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) +; AUTO_VEC-NEXT: [[SMAX:%.*]] = tail call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) ; AUTO_VEC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 16 ; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] ; AUTO_VEC: vector.ph: @@ -451,7 +451,7 @@ define double @external_use_without_fast_math(double* %a, i64 %n) { ; AUTO_VEC-LABEL: @external_use_without_fast_math( ; AUTO_VEC-NEXT: entry: -; AUTO_VEC-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) +; AUTO_VEC-NEXT: [[SMAX:%.*]] = tail call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) ; AUTO_VEC-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 ; AUTO_VEC-NEXT: [[XTRAITER:%.*]] = and i64 [[SMAX]], 7 ; AUTO_VEC-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7 Index: llvm/test/Transforms/PhaseOrdering/X86/loop-idiom-vs-indvars.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/X86/loop-idiom-vs-indvars.ll +++ llvm/test/Transforms/PhaseOrdering/X86/loop-idiom-vs-indvars.ll @@ -12,7 +12,7 @@ ; ALL-LABEL: @cttz( ; ALL-NEXT: entry: ; ALL-NEXT: [[TMP0:%.*]] = shl i32 [[N:%.*]], 1 -; ALL-NEXT: [[TMP1:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP0]], i1 false), [[RNG0:!range !.*]] +; ALL-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[TMP0]], i1 false), !range [[RNG0:![0-9]+]] ; ALL-NEXT: [[TMP2:%.*]] = sub nuw nsw i32 32, [[TMP1]] ; ALL-NEXT: [[TMP3:%.*]] = sub nuw nsw i32 75, [[TMP1]] ; ALL-NEXT: store i32 [[TMP3]], i32* [[P1:%.*]], align 4 Index: llvm/test/Transforms/PhaseOrdering/memset-tail.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/memset-tail.ll +++ llvm/test/Transforms/PhaseOrdering/memset-tail.ll @@ -8,7 +8,7 @@ ; CHECK-NEXT: br i1 [[CMP_NOT1]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]] ; CHECK: while.body.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[C]] to i64 -; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[D:%.*]], i8 0, i64 [[TMP0]], i1 false) +; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr align 1 [[D:%.*]], i8 0, i64 [[TMP0]], i1 false) ; CHECK-NEXT: br label [[WHILE_END]] ; CHECK: while.end: ; CHECK-NEXT: ret void Index: llvm/test/Transforms/PhaseOrdering/single-iteration-loop-sroa.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/single-iteration-loop-sroa.ll +++ llvm/test/Transforms/PhaseOrdering/single-iteration-loop-sroa.ll @@ -58,7 +58,7 @@ define i16 @test(i16 %arg) { ; CHECK-LABEL: @test( ; CHECK-NEXT: bb6.i.i.i: -; CHECK-NEXT: [[DATA_I_SROA_0_0_INSERT_INSERT:%.*]] = call i16 @llvm.bswap.i16(i16 [[ARG:%.*]]) +; CHECK-NEXT: [[DATA_I_SROA_0_0_INSERT_INSERT:%.*]] = tail call i16 @llvm.bswap.i16(i16 [[ARG:%.*]]) ; CHECK-NEXT: ret i16 [[DATA_I_SROA_0_0_INSERT_INSERT]] ; %ret = call i16 @helper(i16 %arg, i64 1)