diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -1307,7 +1307,25 @@ // instruction, and we have to stick with the stack alignment. 2048 has // 16-byte alignment. The stack alignment for RV32 and RV64 is 16 and for // RV32E it is 4. So (2048 - StackAlign) will satisfy the stack alignment. - return 2048 - getStackAlign().value(); + const uint64_t StackAlign = getStackAlign().value(); + // Adjust the FirstSP amount to make callee saved and restored instructions + // be compressed. + if (STI.hasStdExtCOrZca()) { + // riscv32: c.lwsp rd, offset[7:2] => 2^(6 + 2) + // c.swsp rs2, offset[7:2] => 2^(6 + 2) + // c.flwsp rd, offset[7:2] => 2^(6 + 2) + // c.fswsp rs2, offset[7:2] => 2^(6 + 2) + // riscv64: c.ldsp rd, offset[8:3] => 2^(6 + 3) + // c.sdsp rs2, offset[8:3] => 2^(6 + 3) + // c.fldsp rd, offset[8:3] => 2^(6 + 3) + // c.fsdsp rs2, offset[8:3] => 2^(6 + 3) + const uint64_t RVCompressLen = STI.getXLen() * 8; + // Avoid increasing extra instructions when ld/st can be compressed. + if ((CSI.size() <= RVCompressLen) && (StackSize <= RVCompressLen + 2048 || + StackSize > 2048 * 3 - StackAlign)) + return RVCompressLen; + } + return 2048 - StackAlign; } return 0; } diff --git a/llvm/test/CodeGen/RISCV/stack-inst-compress.mir b/llvm/test/CodeGen/RISCV/stack-inst-compress.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/stack-inst-compress.mir @@ -0,0 +1,190 @@ +# RUN: llc -march=riscv32 -x mir -run-pass=prologepilog -verify-machineinstrs < %s \ +# RUN: | FileCheck -check-prefixes=CHECK-RV32-NO-COM %s +# RUN: llc -march=riscv32 -mattr=+c -x mir -run-pass=prologepilog \ +# RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK-RV32-COM %s +# RUN: llc -march=riscv64 -x mir -run-pass=prologepilog -verify-machineinstrs < %s \ +# RUN: | FileCheck -check-prefixes=CHECK-RV64-NO-COM %s +# RUN: llc -march=riscv64 -mattr=+c -x mir -run-pass=prologepilog \ +# RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK-RV64-COM %s +--- | + define dso_local void @_Z18caller_small_stackv() { + entry: + ret void + } + + declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) + + declare dso_local void @_Z6calleePi(ptr noundef) + + define dso_local void @_Z19caller_larger_stackv() { + entry: + ret void + } + +... +--- +name: _Z18caller_small_stackv +alignment: 2 +tracksRegLiveness: true +frameInfo: + maxAlignment: 8 + hasCalls: true + localFrameSize: 2068 +stack: + - { id: 0, size: 2068, alignment: 4, local-offset: -2068 } + - { id: 1, type: spill-slot, size: 8, alignment: 8 } +machineFunctionInfo: + varArgsFrameIndex: 0 + varArgsSaveSize: 0 +body: | + bb.0.entry: + ; CHECK-RV32-NO-COM-LABEL: name: _Z18caller_small_stackv + ; CHECK-RV32-NO-COM: $x2 = frame-setup ADDI $x2, -2032 + ; CHECK-RV32-NO-COM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 2032 + ; CHECK-RV32-NO-COM-NEXT: SW killed $x1, $x2, 2028 :: (store (s32) into %stack.2) + ; CHECK-RV32-NO-COM-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -4 + ; CHECK-RV32-NO-COM-NEXT: $x2 = frame-setup ADDI $x2, -64 + + ; CHECK-RV32-NO-COM: $x2 = frame-destroy ADDI $x2, 64 + ; CHECK-RV32-NO-COM-NEXT: $x1 = LW $x2, 2028 :: (load (s32) from %stack.2) + ; CHECK-RV32-NO-COM-NEXT: $x2 = frame-destroy ADDI $x2, 2032 + ; + ; CHECK-RV32-COM-LABEL: name: _Z18caller_small_stackv + ; CHECK-RV32-COM: $x2 = frame-setup ADDI $x2, -256 + ; CHECK-RV32-COM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 256 + ; CHECK-RV32-COM-NEXT: SW killed $x1, $x2, 252 :: (store (s32) into %stack.2) + ; CHECK-RV32-COM-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -4 + ; CHECK-RV32-COM-NEXT: $x2 = frame-setup ADDI $x2, -1840 + + ; CHECK-RV32-COM: $x2 = frame-destroy ADDI $x2, 1840 + ; CHECK-RV32-COM-NEXT: $x1 = LW $x2, 252 :: (load (s32) from %stack.2) + ; CHECK-RV32-COM-NEXT: $x2 = frame-destroy ADDI $x2, 256 + ; + ; CHECK-RV64-NO-COM-LABEL: name: _Z18caller_small_stackv + ; CHECK-RV64-NO-COM: $x2 = frame-setup ADDI $x2, -2032 + ; CHECK-RV64-NO-COM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 2032 + ; CHECK-RV64-NO-COM-NEXT: SD killed $x1, $x2, 2024 :: (store (s64) into %stack.2) + ; CHECK-RV64-NO-COM-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -8 + ; CHECK-RV64-NO-COM-NEXT: $x2 = frame-setup ADDI $x2, -64 + + ; CHECK-RV64-NO-COM: $x2 = frame-destroy ADDI $x2, 64 + ; CHECK-RV64-NO-COM-NEXT: $x1 = LD $x2, 2024 :: (load (s64) from %stack.2) + ; CHECK-RV64-NO-COM-NEXT: $x2 = frame-destroy ADDI $x2, 2032 + ; + ; CHECK-RV64-COM-LABEL: name: _Z18caller_small_stackv + ; CHECK-RV64-COM: $x2 = frame-setup ADDI $x2, -512 + ; CHECK-RV64-COM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 512 + ; CHECK-RV64-COM-NEXT: SD killed $x1, $x2, 504 :: (store (s64) into %stack.2) + ; CHECK-RV64-COM-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -8 + ; CHECK-RV64-COM-NEXT: $x2 = frame-setup ADDI $x2, -1584 + + ; CHECK-RV64-COM: $x2 = frame-destroy ADDI $x2, 1584 + ; CHECK-RV64-COM-NEXT: $x1 = LD $x2, 504 :: (load (s64) from %stack.2) + ; CHECK-RV64-COM-NEXT: $x2 = frame-destroy ADDI $x2, 512 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 + renamable $x10 = LUI 1 + renamable $x12 = ADDIW killed renamable $x10, -2028 + renamable $x10 = ADDI %stack.0, 0 + SD $x10, %stack.1, 0 :: (store (s64) into %stack.1) + renamable $x11 = COPY $x0 + PseudoCALL target-flags(riscv-plt) &memset, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit killed $x11, implicit killed $x12, implicit-def $x2, implicit-def $x10 + ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2 + dead renamable $x11 = COPY $x10 + $x10 = LD %stack.1, 0 :: (load (s64) from %stack.1) + ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 + PseudoCALL target-flags(riscv-call) @_Z6calleePi, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit-def $x2 + ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2 + PseudoRET + +... +--- +name: _Z19caller_larger_stackv +alignment: 2 +tracksRegLiveness: true +frameInfo: + maxAlignment: 8 + hasCalls: true + localFrameSize: 6144 +stack: + - { id: 0, size: 6144, alignment: 4, local-offset: -6144 } + - { id: 1, type: spill-slot, size: 8, alignment: 8 } +machineFunctionInfo: + varArgsFrameIndex: 0 + varArgsSaveSize: 0 +body: | + bb.0.entry: + ; CHECK-RV32-NO-COM-LABEL: name: _Z19caller_larger_stackv + ; CHECK-RV32-NO-COM: $x2 = frame-setup ADDI $x2, -2032 + ; CHECK-RV32-NO-COM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 2032 + ; CHECK-RV32-NO-COM-NEXT: SW killed $x1, $x2, 2028 :: (store (s32) into %stack.2) + ; CHECK-RV32-NO-COM-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -4 + ; CHECK-RV32-NO-COM-NEXT: $x10 = frame-setup LUI 1 + ; CHECK-RV32-NO-COM-NEXT: $x10 = frame-setup ADDI killed $x10, 48 + ; CHECK-RV32-NO-COM-NEXT: $x2 = frame-setup SUB $x2, killed $x10 + + ; CHECK-RV32-NO-COM: $x10 = frame-destroy LUI 1 + ; CHECK-RV32-NO-COM-NEXT: $x10 = frame-destroy ADDI killed $x10, 48 + ; CHECK-RV32-NO-COM-NEXT: $x2 = frame-destroy ADD $x2, killed $x10 + ; CHECK-RV32-NO-COM-NEXT: $x1 = LW $x2, 2028 :: (load (s32) from %stack.2) + ; CHECK-RV32-NO-COM-NEXT: $x2 = frame-destroy ADDI $x2, 2032 + ; + ; CHECK-RV32-COM-LABEL: name: _Z19caller_larger_stackv + ; CHECK-RV32-COM: $x2 = frame-setup ADDI $x2, -256 + ; CHECK-RV32-COM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 256 + ; CHECK-RV32-COM-NEXT: SW killed $x1, $x2, 252 :: (store (s32) into %stack.2) + ; CHECK-RV32-COM-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -4 + ; CHECK-RV32-COM-NEXT: $x10 = frame-setup LUI 1 + ; CHECK-RV32-COM-NEXT: $x10 = frame-setup ADDI killed $x10, 1824 + ; CHECK-RV32-COM-NEXT: $x2 = frame-setup SUB $x2, killed $x10 + + ; CHECK-RV32-COM: $x10 = frame-destroy LUI 1 + ; CHECK-RV32-COM-NEXT: $x10 = frame-destroy ADDI killed $x10, 1824 + ; CHECK-RV32-COM-NEXT: $x2 = frame-destroy ADD $x2, killed $x10 + ; CHECK-RV32-COM-NEXT: $x1 = LW $x2, 252 :: (load (s32) from %stack.2) + ; CHECK-RV32-COM-NEXT: $x2 = frame-destroy ADDI $x2, 256 + ; + ; CHECK-RV64-NO-COM-LABEL: name: _Z19caller_larger_stackv + ; CHECK-RV64-NO-COM: $x2 = frame-setup ADDI $x2, -2032 + ; CHECK-RV64-NO-COM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 2032 + ; CHECK-RV64-NO-COM-NEXT: SD killed $x1, $x2, 2024 :: (store (s64) into %stack.2) + ; CHECK-RV64-NO-COM-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -8 + ; CHECK-RV64-NO-COM-NEXT: $x10 = frame-setup LUI 1 + ; CHECK-RV64-NO-COM-NEXT: $x10 = frame-setup ADDIW killed $x10, 48 + ; CHECK-RV64-NO-COM-NEXT: $x2 = frame-setup SUB $x2, killed $x10 + + ; CHECK-RV64-NO-COM: $x10 = frame-destroy LUI 1 + ; CHECK-RV64-NO-COM-NEXT: $x10 = frame-destroy ADDIW killed $x10, 48 + ; CHECK-RV64-NO-COM-NEXT: $x2 = frame-destroy ADD $x2, killed $x10 + ; CHECK-RV64-NO-COM-NEXT: $x1 = LD $x2, 2024 :: (load (s64) from %stack.2) + ; CHECK-RV64-NO-COM-NEXT: $x2 = frame-destroy ADDI $x2, 2032 + ; + ; CHECK-RV64-COM-LABEL: name: _Z19caller_larger_stackv + ; CHECK-RV64-COM: $x2 = frame-setup ADDI $x2, -512 + ; CHECK-RV64-COM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 512 + ; CHECK-RV64-COM-NEXT: SD killed $x1, $x2, 504 :: (store (s64) into %stack.2) + ; CHECK-RV64-COM-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -8 + ; CHECK-RV64-COM-NEXT: $x10 = frame-setup LUI 1 + ; CHECK-RV64-COM-NEXT: $x10 = frame-setup ADDIW killed $x10, 1568 + ; CHECK-RV64-COM-NEXT: $x2 = frame-setup SUB $x2, killed $x10 + + ; CHECK-RV64-COM: $x10 = frame-destroy LUI 1 + ; CHECK-RV64-COM-NEXT: $x10 = frame-destroy ADDIW killed $x10, 1568 + ; CHECK-RV64-COM-NEXT: $x2 = frame-destroy ADD $x2, killed $x10 + ; CHECK-RV64-COM-NEXT: $x1 = LD $x2, 504 :: (load (s64) from %stack.2) + ; CHECK-RV64-COM-NEXT: $x2 = frame-destroy ADDI $x2, 512 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 + renamable $x10 = ADDI $x0, 3 + renamable $x12 = SLLI killed renamable $x10, 11 + renamable $x10 = ADDI %stack.0, 0 + SD $x10, %stack.1, 0 :: (store (s64) into %stack.1) + renamable $x11 = COPY $x0 + PseudoCALL target-flags(riscv-plt) &memset, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit killed $x11, implicit killed $x12, implicit-def $x2, implicit-def $x10 + ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2 + dead renamable $x11 = COPY $x10 + $x10 = LD %stack.1, 0 :: (load (s64) from %stack.1) + ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 + PseudoCALL target-flags(riscv-call) @_Z6calleePi, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit-def $x2 + ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2 + PseudoRET + +...