diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -27,6 +27,12 @@ using namespace llvm; +static cl::opt + CompressStackInst("riscv-compress-stack-inst", cl::Hidden, + cl::desc("Compress stack related instructions" + " by splitting or reordering stack layout"), + cl::init(false)); + static const Register AllPopRegs[] = { RISCV::X1, RISCV::X8, RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22, RISCV::X23, RISCV::X24, @@ -1298,6 +1304,18 @@ if (RVFI->getLibCallStackSize() || RVFI->getRVPushStackSize()) return 0; + // Make stack ld/st and fld/fst are easier to be compressed. + if (CompressStackInst && STI.hasStdExtC()) { + if (STI.getXLen() == 32 && StackSize > 256 && (CSI.size() > 0)) { + // c.lwsp rd, offset[7:2] 2^(6 + 2) + return 256; + + } else if (STI.getXLen() == 64 && StackSize > 512 && (CSI.size() > 0)) { + // c.ldsp rd, offset[8:3](x2) 2^(6 + 3) + return 512; + } + } + // Return the FirstSPAdjustAmount if the StackSize can not fit in a signed // 12-bit and there exists a callee-saved register needing to be pushed. if (!isInt<12>(StackSize) && (CSI.size() > 0)) { diff --git a/llvm/test/CodeGen/RISCV/compress-stack.mir b/llvm/test/CodeGen/RISCV/compress-stack.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/compress-stack.mir @@ -0,0 +1,145 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# RUN: llc -march=riscv32 -x mir -run-pass=prologepilog -verify-machineinstrs < %s \ +# RUN: | FileCheck -check-prefixes=CHECK-RV32-NO-STACK-OPT %s +# RUN: llc -march=riscv32 -mattr=+c -x mir -run-pass=prologepilog -riscv-compress-stack-inst \ +# RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK-RV32-STACK-OPT %s +# RUN: llc -march=riscv64 -x mir -run-pass=prologepilog -verify-machineinstrs < %s \ +# RUN: | FileCheck -check-prefixes=CHECK-RV64-NO-STACK-OPT %s +# RUN: llc -march=riscv64 -mattr=+c -x mir -run-pass=prologepilog -riscv-compress-stack-inst \ +# RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK-RV64-STACK-OPT %s + +--- | + + define dso_local void @_Z6callerv() { + %1 = alloca [1000 x i32], align 4 + %2 = alloca i32, align 4 + store i32 0, ptr %2, align 4 + br label %3 + + 3: ; preds = %11, %0 + %4 = load i32, ptr %2, align 4 + %5 = icmp slt i32 %4, 1000 + br i1 %5, label %6, label %14 + + 6: ; preds = %3 + %7 = load i32, ptr %2, align 4 + %8 = load i32, ptr %2, align 4 + %9 = sext i32 %8 to i64 + %10 = getelementptr inbounds [1000 x i32], ptr %1, i64 0, i64 %9 + store i32 %7, ptr %10, align 4 + br label %11 + + 11: ; preds = %6 + %12 = load i32, ptr %2, align 4 + %13 = add nsw i32 %12, 1 + store i32 %13, ptr %2, align 4 + br label %3 + + 14: ; preds = %3 + %15 = getelementptr inbounds [1000 x i32], ptr %1, i64 0, i64 0 + call void @_Z6calleePi(ptr noundef %15) + ret void + } + + declare dso_local void @_Z6calleePi(ptr noundef) + +... +--- +name: _Z6callerv +alignment: 2 +tracksRegLiveness: true +frameInfo: + maxAlignment: 8 + hasCalls: true + localFrameSize: 4004 +stack: + - { id: 0, size: 4000, alignment: 4, local-offset: -4000 } + - { id: 1, size: 4, alignment: 4, local-offset: -4004 } + - { id: 2, type: spill-slot, size: 8, alignment: 8 } +machineFunctionInfo: + varArgsFrameIndex: 0 + varArgsSaveSize: 0 +body: | + ; CHECK-RV32-NO-STACK-OPT-LABEL: name: _Z6callerv + ; CHECK-RV32-NO-STACK-OPT: $x2 = frame-setup ADDI $x2, -2032 + ; CHECK-RV32-NO-STACK-OPT-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 2032 + ; CHECK-RV32-NO-STACK-OPT-NEXT: SW killed $x1, $x2, 2028 :: (store (s32) into %stack.3) + ; CHECK-RV32-NO-STACK-OPT-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -4 + ; CHECK-RV32-NO-STACK-OPT-NEXT: $x2 = frame-setup ADDI $x2, -2000 + ; CHECK-RV32-NO-STACK-OPT: $x2 = frame-destroy ADDI $x2, 2000 + ; CHECK-RV32-NO-STACK-OPT-NEXT: $x1 = LW $x2, 2028 :: (load (s32) from %stack.3) + ; CHECK-RV32-NO-STACK-OPT-NEXT: $x2 = frame-destroy ADDI $x2, 2032 + ; + ; CHECK-RV32-STACK-OPT-LABEL: name: _Z6callerv + ; CHECK-RV32-STACK-OPT: $x2 = frame-setup ADDI $x2, -256 + ; CHECK-RV32-STACK-OPT-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 256 + ; CHECK-RV32-STACK-OPT-NEXT: SW killed $x1, $x2, 252 :: (store (s32) into %stack.3) + ; CHECK-RV32-STACK-OPT-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -4 + ; CHECK-RV32-STACK-OPT-NEXT: $x2 = frame-setup ADDI $x2, -2048 + ; CHECK-RV32-STACK-OPT-NEXT: $x2 = frame-setup ADDI killed $x2, -1728 + ; CHECK-RV32-STACK-OPT: $x2 = frame-destroy ADDI $x2, 2032 + ; CHECK-RV32-STACK-OPT-NEXT: $x2 = frame-destroy ADDI killed $x2, 1744 + ; CHECK-RV32-STACK-OPT-NEXT: $x1 = LW $x2, 252 :: (load (s32) from %stack.3) + ; CHECK-RV32-STACK-OPT-NEXT: $x2 = frame-destroy ADDI $x2, 256 + ; + ; CHECK-RV64-NO-STACK-OPT-LABEL: name: _Z6callerv + ; CHECK-RV64-NO-STACK-OPT: $x2 = frame-setup ADDI $x2, -2032 + ; CHECK-RV64-NO-STACK-OPT-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 2032 + ; CHECK-RV64-NO-STACK-OPT-NEXT: SD killed $x1, $x2, 2024 :: (store (s64) into %stack.3) + ; CHECK-RV64-NO-STACK-OPT-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -8 + ; CHECK-RV64-NO-STACK-OPT-NEXT: $x2 = frame-setup ADDI $x2, -2000 + ; CHECK-RV64-NO-STACK-OPT: $x2 = frame-destroy ADDI $x2, 2000 + ; CHECK-RV64-NO-STACK-OPT-NEXT: $x1 = LD $x2, 2024 :: (load (s64) from %stack.3) + ; CHECK-RV64-NO-STACK-OPT-NEXT: $x2 = frame-destroy ADDI $x2, 2032 + ; + ; CHECK-RV64-STACK-OPT-LABEL: name: _Z6callerv + ; CHECK-RV64-STACK-OPT: $x2 = frame-setup ADDI $x2, -512 + ; CHECK-RV64-STACK-OPT-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 512 + ; CHECK-RV64-STACK-OPT-NEXT: SD killed $x1, $x2, 504 :: (store (s64) into %stack.3) + ; CHECK-RV64-STACK-OPT-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -8 + ; CHECK-RV64-STACK-OPT-NEXT: $x2 = frame-setup ADDI $x2, -2048 + ; CHECK-RV64-STACK-OPT-NEXT: $x2 = frame-setup ADDI killed $x2, -1472 + ; CHECK-RV64-STACK-OPT: $x2 = frame-destroy ADDI $x2, 2032 + ; CHECK-RV64-STACK-OPT-NEXT: $x2 = frame-destroy ADDI killed $x2, 1488 + ; CHECK-RV64-STACK-OPT-NEXT: $x1 = LD $x2, 504 :: (load (s64) from %stack.3) + ; CHECK-RV64-STACK-OPT-NEXT: $x2 = frame-destroy ADDI $x2, 512 + bb.0 (%ir-block.0): + renamable $x11 = ADDI %stack.1, 0 + SD $x11, %stack.2, 0 :: (store (s64) into %stack.2) + renamable $x10 = COPY $x0 + SW killed renamable $x10, renamable $x11, 0 :: (store (s32) into %ir.2) + PseudoBR %bb.1 + + bb.1 (%ir-block.3): + successors: %bb.2, %bb.4 + + $x10 = LD %stack.2, 0 :: (load (s64) from %stack.2) + renamable $x11 = LW renamable $x10, 0 :: (dereferenceable load (s32) from %ir.2) + renamable $x10 = ADDI $x0, 999 + BLT killed renamable $x10, killed renamable $x11, %bb.4 + PseudoBR %bb.2 + + bb.2 (%ir-block.6): + $x10 = LD %stack.2, 0 :: (load (s64) from %stack.2) + renamable $x10 = LW renamable $x10, 0 :: (dereferenceable load (s32) from %ir.2) + renamable $x12 = SLLI renamable $x10, 2 + renamable $x11 = ADDI %stack.0, 0 + renamable $x11 = ADD killed renamable $x11, killed renamable $x12 + SW killed renamable $x10, killed renamable $x11, 0 :: (store (s32) into %ir.10) + PseudoBR %bb.3 + + bb.3 (%ir-block.11): + $x11 = LD %stack.2, 0 :: (load (s64) from %stack.2) + renamable $x10 = LW renamable $x11, 0 :: (dereferenceable load (s32) from %ir.2) + renamable $x10 = ADDIW killed renamable $x10, 1 + SW killed renamable $x10, renamable $x11, 0 :: (store (s32) into %ir.2) + PseudoBR %bb.1 + + bb.4 (%ir-block.14): + ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 + renamable $x10 = ADDI %stack.0, 0 + PseudoCALL target-flags(riscv-call) @_Z6calleePi, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit-def $x2 + ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2 + PseudoRET + +...