Index: llvm/lib/Target/RISCV/RISCVTargetMachine.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -42,6 +42,11 @@ cl::desc("Enable the redundant copy elimination pass"), cl::init(true), cl::Hidden); +// FIXME: Unify control over GlobalMerge. +static cl::opt + EnableGlobalMerge("riscv-enable-global-merge", cl::Hidden, + cl::desc("Enable the global merge pass")); + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { RegisterTargetMachine X(getTheRISCV32Target()); RegisterTargetMachine Y(getTheRISCV64Target()); @@ -204,6 +209,17 @@ // more details. addPass(createBarrierNoopPass()); } + + if ((TM->getOptLevel() != CodeGenOpt::None && + EnableGlobalMerge == cl::BOU_UNSET) || + EnableGlobalMerge == cl::BOU_TRUE) { + bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) && + (EnableGlobalMerge == cl::BOU_UNSET); + + addPass(createGlobalMergePass(TM, /* MaxOffset */ 2047, OnlyOptimizeForSize, + /* MergeExternalByDefault */ true)); + } + return false; } Index: llvm/test/CodeGen/RISCV/O3-pipeline.ll =================================================================== --- llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -63,6 +63,7 @@ ; CHECK-NEXT: Exception handling preparation ; CHECK-NEXT: A No-Op Barrier Pass ; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Merge internal globals ; CHECK-NEXT: Safe Stack instrumentation pass ; CHECK-NEXT: Insert stack protectors ; CHECK-NEXT: Module Verifier Index: llvm/test/CodeGen/RISCV/global-merge-minsize.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/RISCV/global-merge-minsize.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s + +@eg1 = dso_local global i32 0, align 4 +@eg2 = dso_local global i32 0, align 4 +@eg3 = dso_local global i32 0, align 4 +@eg4 = dso_local global i32 0, align 4 + +; Demonstrate that at the default optimisation level, global merging takes +; place for globals referenced in minsize functions but not others. + +define void @f1(i32 %a) nounwind { +; CHECK-LABEL: f1: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(eg1) +; CHECK-NEXT: sw a0, %lo(eg1)(a1) +; CHECK-NEXT: lui a1, %hi(eg2) +; CHECK-NEXT: sw a0, %lo(eg2)(a1) +; CHECK-NEXT: ret + store i32 %a, ptr @eg1, align 4 + store i32 %a, ptr @eg2, align 4 + ret void +} + +; TODO: It would be better for code size to alter the first store below by +; first fully materialising .L_MergedGlobals in a1 and then storing to it with +; a 0 offset. + +define void @f2(i32 %a) nounwind minsize optsize { +; CHECK-LABEL: f2: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.L_MergedGlobals) +; CHECK-NEXT: sw a0, %lo(.L_MergedGlobals)(a1) +; CHECK-NEXT: addi a1, a1, %lo(.L_MergedGlobals) +; CHECK-NEXT: sw a0, 4(a1) +; CHECK-NEXT: ret + store i32 %a, ptr @eg3, align 4 + store i32 %a, ptr @eg4, align 4 + ret void +} Index: llvm/test/CodeGen/RISCV/global-merge-offset.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/RISCV/global-merge-offset.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/ArrSize/100/g' %s | llc -mtriple=riscv32 -riscv-enable-global-merge \ +; RUN: -verify-machineinstrs | FileCheck %s +; RUN: sed 's/ArrSize/100/g' %s | llc -mtriple=riscv64 -riscv-enable-global-merge \ +; RUN: -verify-machineinstrs | FileCheck %s +; RUN: sed 's/ArrSize/101/g' %s | llc -mtriple=riscv32 -riscv-enable-global-merge \ +; RUN: -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-TOOBIG +; RUN: sed 's/ArrSize/101/g' %s | llc -mtriple=riscv64 -riscv-enable-global-merge \ +; RUN: -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-TOOBIG + +; This test demonstrates that the MaxOffset is set correctly for RISC-V by +; constructing an input that is at the limit and comparing. + +@ga1 = dso_local global [410 x i32] zeroinitializer, align 4 +@ga2 = dso_local global [ArrSize x i32] zeroinitializer, align 4 +@gi = dso_local global i32 0, align 4 + +; TODO: It would be better for codesize if the final store below was +; `sw a0, 0(a2)`. + +define void @f1(i32 %a) nounwind { +; CHECK-LABEL: f1: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.L_MergedGlobals) +; CHECK-NEXT: addi a2, a1, %lo(.L_MergedGlobals) +; CHECK-NEXT: sw a0, 2044(a2) +; CHECK-NEXT: sw a0, 404(a2) +; CHECK-NEXT: sw a0, %lo(.L_MergedGlobals)(a1) +; CHECK-NEXT: ret +; +; CHECK-TOOBIG-LABEL: f1: +; CHECK-TOOBIG: # %bb.0: +; CHECK-TOOBIG-NEXT: lui a1, %hi(ga1+1640) +; CHECK-TOOBIG-NEXT: lui a2, %hi(.L_MergedGlobals) +; CHECK-TOOBIG-NEXT: addi a3, a2, %lo(.L_MergedGlobals) +; CHECK-TOOBIG-NEXT: sw a0, %lo(ga1+1640)(a1) +; CHECK-TOOBIG-NEXT: sw a0, 408(a3) +; CHECK-TOOBIG-NEXT: sw a0, %lo(.L_MergedGlobals)(a2) +; CHECK-TOOBIG-NEXT: ret + %ga1_end = getelementptr inbounds [410 x i32], ptr @ga1, i32 0, i64 410 + %ga2_end = getelementptr inbounds [ArrSize x i32], ptr @ga2, i32 0, i64 ArrSize + store i32 %a, ptr %ga1_end, align 4 + store i32 %a, ptr %ga2_end, align 4 + store i32 %a, ptr @gi, align 4 + ret void +} Index: llvm/test/CodeGen/RISCV/global-merge.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/RISCV/global-merge.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -riscv-enable-global-merge -verify-machineinstrs < %s \ +; RUN: | FileCheck %s +; RUN: llc -mtriple=riscv64 -riscv-enable-global-merge -verify-machineinstrs < %s \ +; RUN: | FileCheck %s + +@ig1 = internal global i32 0, align 4 +@ig2 = internal global i32 0, align 4 + +@eg1 = dso_local global i32 0, align 4 +@eg2 = dso_local global i32 0, align 4 + +; TODO: It would be better for code size to alter the first store below by +; first fully materialising .L_MergedGlobals in a1 and then storing to it with +; a 0 offset. + +define void @f1(i32 %a) nounwind { +; CHECK-LABEL: f1: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.L_MergedGlobals) +; CHECK-NEXT: sw a0, %lo(.L_MergedGlobals)(a1) +; CHECK-NEXT: addi a1, a1, %lo(.L_MergedGlobals) +; CHECK-NEXT: sw a0, 4(a1) +; CHECK-NEXT: sw a0, 8(a1) +; CHECK-NEXT: sw a0, 12(a1) +; CHECK-NEXT: ret + store i32 %a, ptr @ig1, align 4 + store i32 %a, ptr @ig2, align 4 + store i32 %a, ptr @eg1, align 4 + store i32 %a, ptr @eg2, align 4 + ret void +}