diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -270,6 +270,7 @@ CODEGENOPT(UnrollLoops , 1, 0) ///< Control whether loops are unrolled. CODEGENOPT(RerollLoops , 1, 0) ///< Control whether loops are rerolled. CODEGENOPT(NoUseJumpTables , 1, 0) ///< Set when -fno-jump-tables is enabled. +CODEGENOPT(AlwaysMem2Reg , 1, 0) ///< Set when -falways-mem2reg is enabled. CODEGENOPT(UnwindTables , 1, 0) ///< Emit unwind tables. CODEGENOPT(VectorizeLoop , 1, 0) ///< Run loop vectorizer. CODEGENOPT(VectorizeSLP , 1, 0) ///< Run SLP vectorizer. diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4788,6 +4788,10 @@ MarshallingInfoFlag>; def fuse_ctor_homing: Flag<["-"], "fuse-ctor-homing">, HelpText<"Use constructor homing if we are using limited debug info already">; +defm falways_mem2reg : BoolFOption<"always-mem2reg", + CodeGenOpts<"AlwaysMem2Reg">, DefaultFalse, + PosFlag, + NegFlag>; } def disable_llvm_verifier : Flag<["-"], "disable-llvm-verifier">, diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -682,6 +682,7 @@ PMBuilder.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS; PMBuilder.DisableUnrollLoops = !CodeGenOpts.UnrollLoops; + PMBuilder.AlwaysMem2Reg = CodeGenOpts.AlwaysMem2Reg; // Loop interleaving in the loop vectorizer has historically been set to be // enabled when loop unrolling is enabled. PMBuilder.LoopsInterleaved = CodeGenOpts.UnrollLoops; @@ -1256,6 +1257,7 @@ PTO.LoopInterleaving = CodeGenOpts.UnrollLoops; PTO.LoopVectorization = CodeGenOpts.VectorizeLoop; PTO.SLPVectorization = CodeGenOpts.VectorizeSLP; + PTO.AlwaysMem2Reg = CodeGenOpts.AlwaysMem2Reg; PTO.MergeFunctions = CodeGenOpts.MergeFunctions; // Only enable CGProfilePass when using integrated assembler, since // non-integrated assemblers don't recognize .cgprofile section. diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1764,6 +1764,8 @@ // starting with the default for this optimization level. bool ShouldAddOptNone = !CodeGenOpts.DisableO0ImplyOptNone && CodeGenOpts.OptimizationLevel == 0; + // -falways-mem2reg implies at least a minimal amount of optimisation. + ShouldAddOptNone &= !CodeGenOpts.AlwaysMem2Reg; // We can't add optnone in the following cases, it won't pass the verifier. ShouldAddOptNone &= !D->hasAttr(); ShouldAddOptNone &= !D->hasAttr(); diff --git a/clang/test/CodeGen/falways-mem2reg.c b/clang/test/CodeGen/falways-mem2reg.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/falways-mem2reg.c @@ -0,0 +1,33 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple riscv64 -emit-llvm -o - -flegacy-pass-manager -O0 %s \ +// RUN: | FileCheck --check-prefix=O0-NO-MEM2REG %s +// RUN: %clang_cc1 -triple riscv64 -emit-llvm -o - -fno-legacy-pass-manager -O0 %s \ +// RUN: | FileCheck --check-prefix=O0-NO-MEM2REG %s +// RUN: %clang_cc1 -triple riscv64 -emit-llvm -o - -flegacy-pass-manager -O0 -fno-always-mem2reg %s \ +// RUN: | FileCheck --check-prefix=O0-NO-MEM2REG %s +// RUN: %clang_cc1 -triple riscv64 -emit-llvm -o - -fno-legacy-pass-manager -O0 -fno-always-mem2reg %s \ +// RUN: | FileCheck --check-prefix=O0-NO-MEM2REG %s +// RUN: %clang_cc1 -triple riscv64 -emit-llvm -o - -flegacy-pass-manager -O0 -falways-mem2reg %s \ +// RUN: | FileCheck --check-prefix=O0-MEM2REG %s +// RUN: %clang_cc1 -triple riscv64 -emit-llvm -o - -fno-legacy-pass-manager -O0 -falways-mem2reg %s \ +// RUN: | FileCheck --check-prefix=O0-MEM2REG %s + +// O0-NO-MEM2REG-LABEL: @add( +// O0-NO-MEM2REG-NEXT: entry: +// O0-NO-MEM2REG-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// O0-NO-MEM2REG-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// O0-NO-MEM2REG-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 +// O0-NO-MEM2REG-NEXT: store i32 [[B:%.*]], i32* [[B_ADDR]], align 4 +// O0-NO-MEM2REG-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// O0-NO-MEM2REG-NEXT: [[TMP1:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// O0-NO-MEM2REG-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] +// O0-NO-MEM2REG-NEXT: ret i32 [[ADD]] +// +// O0-MEM2REG-LABEL: @add( +// O0-MEM2REG-NEXT: entry: +// O0-MEM2REG-NEXT: [[ADD:%.*]] = add nsw i32 [[A:%.*]], [[B:%.*]] +// O0-MEM2REG-NEXT: ret i32 [[ADD]] +// +int add(int a, int b) { + return a + b; +} diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -107,6 +107,10 @@ /// is that of the flag: `-forget-scev-loop-unroll`. bool ForgetAllSCEVInLoopUnroll; + /// Tuning option to always run mem2reg regardless of the optimisation level. + /// Its default value is false. + bool AlwaysMem2Reg; + /// Tuning option to enable/disable coroutine intrinsic lowering. Its default /// value is false. Frontends such as Clang may enable this conditionally. For /// example, Clang enables this option if the flags `-std=c++2a` or above, or diff --git a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h --- a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h +++ b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -156,6 +156,7 @@ bool DisableTailCalls; bool DisableUnrollLoops; + bool AlwaysMem2Reg; bool CallGraphProfile; bool SLPVectorize; bool LoopVectorize; diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -283,6 +283,7 @@ SLPVectorization = false; LoopUnrolling = true; ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll; + AlwaysMem2Reg = false; Coroutines = false; LicmMssaOptCap = SetLicmMssaOptCap; LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap; @@ -1931,6 +1932,9 @@ MPM.addPass(AlwaysInlinerPass( /*InsertLifetimeIntrinsics=*/PTO.Coroutines)); + if (PTO.AlwaysMem2Reg) + MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass())); + if (PTO.MergeFunctions) MPM.addPass(MergeFunctionsPass()); diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -187,6 +187,7 @@ LibraryInfo = nullptr; Inliner = nullptr; DisableUnrollLoops = false; + AlwaysMem2Reg = false; SLPVectorize = false; LoopVectorize = true; LoopsInterleaved = true; @@ -658,8 +659,11 @@ MPM.add(createForceFunctionAttrsLegacyPass()); // If all optimizations are disabled, just run the always-inline pass and, - // if enabled, the function merging pass. + // if enabled, the mem2reg and function merging passes. if (OptLevel == 0) { + if (AlwaysMem2Reg) + MPM.add(createPromoteMemoryToRegisterPass()); + addPGOInstrPasses(MPM); if (Inliner) { MPM.add(Inliner);