diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -1307,7 +1307,23 @@ // instruction, and we have to stick with the stack alignment. 2048 has // 16-byte alignment. The stack alignment for RV32 and RV64 is 16 and for // RV32E it is 4. So (2048 - StackAlign) will satisfy the stack alignment. - return 2048 - getStackAlign().value(); + const uint64_t StackAlign = getStackAlign().value(); + uint64_t FirstSPAmount = 2048 - StackAlign; + // Adjust the FirstSP amount to make stack inst be compressed. + if (STI.hasStdExtC()) { + // riscv32: c.lwsp rd, offset[7:2] => 2^(6+2) + const uint64_t RV32CompressLen = 256; + // riscv64: c.lwsp rd, offset[8:3] => 2^(6+3) + const uint64_t RV64CompressLen = 512; + // Avoid increasing extra instructions when inst can be compressed. + if (STI.getXLen() == 32 && (StackSize <= RV32CompressLen + 2048 || + StackSize > 2048 * 3 - StackAlign)) + FirstSPAmount = 256; + else if (STI.getXLen() == 64 && (StackSize <= RV64CompressLen + 2048 || + StackSize > 2048 * 3 - StackAlign)) + FirstSPAmount = 512; + } + return FirstSPAmount; } return 0; } diff --git a/llvm/test/CodeGen/RISCV/stack-inst-compress.mir b/llvm/test/CodeGen/RISCV/stack-inst-compress.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/stack-inst-compress.mir @@ -0,0 +1,204 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# RUN: llc -march=riscv32 -x mir -run-pass=prologepilog -verify-machineinstrs < %s \ +# RUN: | FileCheck -check-prefixes=CHECK-RV32-NO-COM %s +# RUN: llc -march=riscv32 -mattr=+c -x mir -run-pass=prologepilog \ +# RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK-RV32-COM %s +# RUN: llc -march=riscv64 -x mir -run-pass=prologepilog -verify-machineinstrs < %s \ +# RUN: | FileCheck -check-prefixes=CHECK-RV64-NO-COM %s +# RUN: llc -march=riscv64 -mattr=+c -x mir -run-pass=prologepilog \ +# RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK-RV64-COM %s +--- | + define dso_local void @_Z18caller_small_stackv() { + entry: + %arr = alloca [517 x i32], align 4 + call void @llvm.memset.p0.i64(ptr align 4 %arr, i8 0, i64 2068, i1 false) + %arraydecay = getelementptr inbounds [517 x i32], ptr %arr, i64 0, i64 0 + call void @_Z6calleePi(ptr noundef %arraydecay) + ret void + } + + declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) + + declare dso_local void @_Z6calleePi(ptr noundef) + + define dso_local void @_Z19caller_larger_stackv() { + entry: + %arr = alloca [1536 x i32], align 4 + call void @llvm.memset.p0.i64(ptr align 4 %arr, i8 0, i64 6144, i1 false) + %arraydecay = getelementptr inbounds [1536 x i32], ptr %arr, i64 0, i64 0 + call void @_Z6calleePi(ptr noundef %arraydecay) + ret void + } + +... +--- +name: _Z18caller_small_stackv +alignment: 2 +tracksRegLiveness: true +frameInfo: + maxAlignment: 8 + hasCalls: true + localFrameSize: 2068 +stack: + - { id: 0, name: arr, size: 2068, alignment: 4, local-offset: -2068 } + - { id: 1, type: spill-slot, size: 8, alignment: 8 } +machineFunctionInfo: + varArgsFrameIndex: 0 + varArgsSaveSize: 0 +body: | + bb.0.entry: + ; CHECK-RV32-NO-COM-LABEL: name: _Z18caller_small_stackv + ; CHECK-RV32-NO-COM: $x2 = frame-setup ADDI $x2, -2032 + ; CHECK-RV32-NO-COM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 2032 + ; CHECK-RV32-NO-COM-NEXT: SW killed $x1, $x2, 2028 :: (store (s32) into %stack.2) + ; CHECK-RV32-NO-COM-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -4 + ; CHECK-RV32-NO-COM-NEXT: $x2 = frame-setup ADDI $x2, -64 + + ; CHECK-RV32-NO-COM: $x2 = frame-destroy ADDI $x2, 64 + ; CHECK-RV32-NO-COM-NEXT: $x1 = LW $x2, 2028 :: (load (s32) from %stack.2) + ; CHECK-RV32-NO-COM-NEXT: $x2 = frame-destroy ADDI $x2, 2032 + ; + ; CHECK-RV32-COM-LABEL: name: _Z18caller_small_stackv + ; CHECK-RV32-COM: $x2 = frame-setup ADDI $x2, -256 + ; CHECK-RV32-COM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 256 + ; CHECK-RV32-COM-NEXT: SW killed $x1, $x2, 252 :: (store (s32) into %stack.2) + ; CHECK-RV32-COM-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -4 + ; CHECK-RV32-COM-NEXT: $x2 = frame-setup ADDI $x2, -1840 + + ; CHECK-RV32-COM: $x2 = frame-destroy ADDI $x2, 1840 + ; CHECK-RV32-COM-NEXT: $x1 = LW $x2, 252 :: (load (s32) from %stack.2) + ; CHECK-RV32-COM-NEXT: $x2 = frame-destroy ADDI $x2, 256 + ; + ; CHECK-RV64-NO-COM-LABEL: name: _Z18caller_small_stackv + ; CHECK-RV64-NO-COM: $x2 = frame-setup ADDI $x2, -2032 + ; CHECK-RV64-NO-COM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 2032 + ; CHECK-RV64-NO-COM-NEXT: SD killed $x1, $x2, 2024 :: (store (s64) into %stack.2) + ; CHECK-RV64-NO-COM-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -8 + ; CHECK-RV64-NO-COM-NEXT: $x2 = frame-setup ADDI $x2, -64 + + ; CHECK-RV64-NO-COM: $x2 = frame-destroy ADDI $x2, 64 + ; CHECK-RV64-NO-COM-NEXT: $x1 = LD $x2, 2024 :: (load (s64) from %stack.2) + ; CHECK-RV64-NO-COM-NEXT: $x2 = frame-destroy ADDI $x2, 2032 + ; + ; CHECK-RV64-COM-LABEL: name: _Z18caller_small_stackv + ; CHECK-RV64-COM: $x2 = frame-setup ADDI $x2, -512 + ; CHECK-RV64-COM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 512 + ; CHECK-RV64-COM-NEXT: SD killed $x1, $x2, 504 :: (store (s64) into %stack.2) + ; CHECK-RV64-COM-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -8 + ; CHECK-RV64-COM-NEXT: $x2 = frame-setup ADDI $x2, -1584 + + ; CHECK-RV64-COM: $x2 = frame-destroy ADDI $x2, 1584 + ; CHECK-RV64-COM-NEXT: $x1 = LD $x2, 504 :: (load (s64) from %stack.2) + ; CHECK-RV64-COM-NEXT: $x2 = frame-destroy ADDI $x2, 512 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 + renamable $x10 = LUI 1 + renamable $x12 = ADDIW killed renamable $x10, -2028 + renamable $x10 = ADDI %stack.0.arr, 0 + SD $x10, %stack.1, 0 :: (store (s64) into %stack.1) + renamable $x11 = COPY $x0 + PseudoCALL target-flags(riscv-plt) &memset, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit killed $x11, implicit killed $x12, implicit-def $x2, implicit-def $x10 + ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2 + dead renamable $x11 = COPY $x10 + $x10 = LD %stack.1, 0 :: (load (s64) from %stack.1) + ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 + PseudoCALL target-flags(riscv-call) @_Z6calleePi, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit-def $x2 + ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2 + PseudoRET + +... +--- +name: _Z19caller_larger_stackv +alignment: 2 +tracksRegLiveness: true +frameInfo: + maxAlignment: 8 + hasCalls: true + localFrameSize: 6144 +stack: + - { id: 0, name: arr, size: 6144, alignment: 4, local-offset: -6144 } + - { id: 1, type: spill-slot, size: 8, alignment: 8 } +machineFunctionInfo: + varArgsFrameIndex: 0 + varArgsSaveSize: 0 +body: | + bb.0.entry: + ; CHECK-RV32-NO-COM-LABEL: name: _Z19caller_larger_stackv + ; CHECK-RV32-NO-COM: $x2 = frame-setup ADDI $x2, -2032 + ; CHECK-RV32-NO-COM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 2032 + ; CHECK-RV32-NO-COM-NEXT: SW killed $x1, $x2, 2028 :: (store (s32) into %stack.2) + ; CHECK-RV32-NO-COM-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -4 + ; CHECK-RV32-NO-COM-NEXT: $x10 = frame-setup LUI 1 + ; CHECK-RV32-NO-COM-NEXT: $x10 = frame-setup ADDI killed $x10, 48 + ; CHECK-RV32-NO-COM-NEXT: $x2 = frame-setup SUB $x2, killed $x10 + + ; CHECK-RV32-NO-COM: $x10 = frame-destroy LUI 1 + ; CHECK-RV32-NO-COM-NEXT: $x10 = frame-destroy ADDI killed $x10, 48 + ; CHECK-RV32-NO-COM-NEXT: $x2 = frame-destroy ADD $x2, killed $x10 + ; CHECK-RV32-NO-COM-NEXT: $x1 = LW $x2, 2028 :: (load (s32) from %stack.2) + ; CHECK-RV32-NO-COM-NEXT: $x2 = frame-destroy ADDI $x2, 2032 + ; + ; CHECK-RV32-COM-LABEL: name: _Z19caller_larger_stackv + ; CHECK-RV32-COM: $x2 = frame-setup ADDI $x2, -256 + ; CHECK-RV32-COM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 256 + ; CHECK-RV32-COM-NEXT: SW killed $x1, $x2, 252 :: (store (s32) into %stack.2) + ; CHECK-RV32-COM-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -4 + ; CHECK-RV32-COM-NEXT: $x10 = frame-setup LUI 1 + ; CHECK-RV32-COM-NEXT: $x10 = frame-setup ADDI killed $x10, 1824 + ; CHECK-RV32-COM-NEXT: $x2 = frame-setup SUB $x2, killed $x10 + + ; CHECK-RV32-COM: $x10 = frame-destroy LUI 1 + ; CHECK-RV32-COM-NEXT: $x10 = frame-destroy ADDI killed $x10, 1824 + ; CHECK-RV32-COM-NEXT: $x2 = frame-destroy ADD $x2, killed $x10 + ; CHECK-RV32-COM-NEXT: $x1 = LW $x2, 252 :: (load (s32) from %stack.2) + ; CHECK-RV32-COM-NEXT: $x2 = frame-destroy ADDI $x2, 256 + ; + ; CHECK-RV64-NO-COM-LABEL: name: _Z19caller_larger_stackv + ; CHECK-RV64-NO-COM: $x2 = frame-setup ADDI $x2, -2032 + ; CHECK-RV64-NO-COM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 2032 + ; CHECK-RV64-NO-COM-NEXT: SD killed $x1, $x2, 2024 :: (store (s64) into %stack.2) + ; CHECK-RV64-NO-COM-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -8 + ; CHECK-RV64-NO-COM-NEXT: $x10 = frame-setup LUI 1 + ; CHECK-RV64-NO-COM-NEXT: $x10 = frame-setup ADDIW killed $x10, 48 + ; CHECK-RV64-NO-COM-NEXT: $x2 = frame-setup SUB $x2, killed $x10 + + ; CHECK-RV64-NO-COM: $x10 = frame-destroy LUI 1 + ; CHECK-RV64-NO-COM-NEXT: $x10 = frame-destroy ADDIW killed $x10, 48 + ; CHECK-RV64-NO-COM-NEXT: $x2 = frame-destroy ADD $x2, killed $x10 + ; CHECK-RV64-NO-COM-NEXT: $x1 = LD $x2, 2024 :: (load (s64) from %stack.2) + ; CHECK-RV64-NO-COM-NEXT: $x2 = frame-destroy ADDI $x2, 2032 + ; + ; CHECK-RV64-COM-LABEL: name: _Z19caller_larger_stackv + ; CHECK-RV64-COM: $x2 = frame-setup ADDI $x2, -512 + ; CHECK-RV64-COM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 512 + ; CHECK-RV64-COM-NEXT: SD killed $x1, $x2, 504 :: (store (s64) into %stack.2) + ; CHECK-RV64-COM-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -8 + ; CHECK-RV64-COM-NEXT: $x10 = frame-setup LUI 1 + ; CHECK-RV64-COM-NEXT: $x10 = frame-setup ADDIW killed $x10, 1568 + ; CHECK-RV64-COM-NEXT: $x2 = frame-setup SUB $x2, killed $x10 + + ; CHECK-RV64-COM: $x10 = frame-destroy LUI 1 + ; CHECK-RV64-COM-NEXT: $x10 = frame-destroy ADDIW killed $x10, 1568 + ; CHECK-RV64-COM-NEXT: $x2 = frame-destroy ADD $x2, killed $x10 + ; CHECK-RV64-COM-NEXT: $x1 = LD $x2, 504 :: (load (s64) from %stack.2) + ; CHECK-RV64-COM-NEXT: $x2 = frame-destroy ADDI $x2, 512 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 + renamable $x10 = ADDI $x0, 3 + renamable $x12 = SLLI killed renamable $x10, 11 + renamable $x10 = ADDI %stack.0.arr, 0 + SD $x10, %stack.1, 0 :: (store (s64) into %stack.1) + renamable $x11 = COPY $x0 + PseudoCALL target-flags(riscv-plt) &memset, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit killed $x11, implicit killed $x12, implicit-def $x2, implicit-def $x10 + ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2 + dead renamable $x11 = COPY $x10 + $x10 = LD %stack.1, 0 :: (load (s64) from %stack.1) + ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 + PseudoCALL target-flags(riscv-call) @_Z6calleePi, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit-def $x2 + ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2 + PseudoRET + +... +## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +# CHECK-RV32-COM: {{.*}} +# CHECK-RV32-NO-COM: {{.*}} +# CHECK-RV64-COM: {{.*}} +# CHECK-RV64-NO-COM: {{.*}}