diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -6548,11 +6548,14 @@ // For now, only handle fully promoted and indirect arguments. if (VA.getLocInfo() == CCValAssign::Indirect) { // Store the argument in a stack slot and pass its address. - SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT); - int FI = cast(SpillSlot)->getIndex(); - MemOpChains.push_back( - DAG.getStore(Chain, DL, ArgValue, SpillSlot, - MachinePointerInfo::getFixedStack(MF, FI))); + auto &TDL = DAG.getDataLayout(); + Align StackAlign = TDL.getPrefTypeAlign( + ArgValue.getValueType().getTypeForEVT(*DAG.getContext())); + StackAlign = + std::max(TDL.getPrefTypeAlign(ArgValue.getValueType().getTypeForEVT( + *DAG.getContext())), + StackAlign); + TypeSize StoredSize = ArgValue.getValueType().getStoreSize(); // If the original argument was split (e.g. i128), we need // to store the required parts of it here (and pass just one address). // Vectors may be partly split to registers and partly to the stack, in @@ -6561,15 +6564,34 @@ unsigned ArgIndex = Outs[i].OrigArgIndex; unsigned ArgPartOffset = Outs[i].PartOffset; assert(VA.getValVT().isVector() || ArgPartOffset == 0); + // Calculate the total size to store. We don't have access to what we're + // actually storing other than performing the loop and collecting the + // info. + SmallVector> Parts; while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { SDValue PartValue = OutVals[i + 1]; unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset; + EVT PartVT = PartValue.getValueType(); + StoredSize += PartVT.getStoreSize(); + StackAlign = std::max( + TDL.getPrefTypeAlign(PartVT.getTypeForEVT(*DAG.getContext())), + StackAlign); + Parts.push_back(std::make_pair(PartValue, PartOffset)); + ++i; + } + SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign); + int FI = cast(SpillSlot)->getIndex(); + MemOpChains.push_back( + DAG.getStore(Chain, DL, ArgValue, SpillSlot, + MachinePointerInfo::getFixedStack(MF, FI))); + for (const auto &Part : Parts) { + SDValue PartValue = Part.first; + unsigned PartOffset = Part.second; SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, DAG.getIntPtrConstant(PartOffset, DL)); MemOpChains.push_back( DAG.getStore(Chain, DL, PartValue, Address, MachinePointerInfo::getFixedStack(MF, FI))); - ++i; } ArgValue = SpillSlot; } else { diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll @@ -143,8 +143,8 @@ ; should only be 4-byte aligned ; RV32I-FPELIM-LABEL: caller_aligned_stack: ; RV32I-FPELIM: # %bb.0: -; RV32I-FPELIM-NEXT: addi sp, sp, -64 -; RV32I-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32I-FPELIM-NEXT: addi sp, sp, -48 +; RV32I-FPELIM-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32I-FPELIM-NEXT: addi a0, zero, 18 ; RV32I-FPELIM-NEXT: sw a0, 24(sp) ; RV32I-FPELIM-NEXT: addi a0, zero, 17 @@ -161,28 +161,28 @@ ; RV32I-FPELIM-NEXT: sw a0, 0(sp) ; RV32I-FPELIM-NEXT: lui a0, 262153 ; RV32I-FPELIM-NEXT: addi a0, a0, 491 -; RV32I-FPELIM-NEXT: sw a0, 44(sp) +; RV32I-FPELIM-NEXT: sw a0, 40(sp) ; RV32I-FPELIM-NEXT: lui a0, 545260 ; RV32I-FPELIM-NEXT: addi a0, a0, -1967 -; RV32I-FPELIM-NEXT: sw a0, 40(sp) +; RV32I-FPELIM-NEXT: sw a0, 36(sp) ; RV32I-FPELIM-NEXT: lui a0, 964690 ; RV32I-FPELIM-NEXT: addi a0, a0, -328 -; RV32I-FPELIM-NEXT: sw a0, 36(sp) +; RV32I-FPELIM-NEXT: sw a0, 32(sp) ; RV32I-FPELIM-NEXT: lui a0, 335544 ; RV32I-FPELIM-NEXT: addi t0, a0, 1311 ; RV32I-FPELIM-NEXT: lui a0, 688509 ; RV32I-FPELIM-NEXT: addi a5, a0, -2048 ; RV32I-FPELIM-NEXT: addi a0, zero, 1 ; RV32I-FPELIM-NEXT: addi a1, zero, 11 -; RV32I-FPELIM-NEXT: addi a2, sp, 32 +; RV32I-FPELIM-NEXT: addi a2, sp, 28 ; RV32I-FPELIM-NEXT: addi a3, zero, 12 ; RV32I-FPELIM-NEXT: addi a4, zero, 13 ; RV32I-FPELIM-NEXT: addi a6, zero, 4 ; RV32I-FPELIM-NEXT: addi a7, zero, 14 -; RV32I-FPELIM-NEXT: sw t0, 32(sp) +; RV32I-FPELIM-NEXT: sw t0, 28(sp) ; RV32I-FPELIM-NEXT: call callee_aligned_stack@plt -; RV32I-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; RV32I-FPELIM-NEXT: addi sp, sp, 64 +; RV32I-FPELIM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-FPELIM-NEXT: addi sp, sp, 48 ; RV32I-FPELIM-NEXT: ret ; ; RV32I-WITHFP-LABEL: caller_aligned_stack: @@ -207,25 +207,25 @@ ; RV32I-WITHFP-NEXT: sw a0, 0(sp) ; RV32I-WITHFP-NEXT: lui a0, 262153 ; RV32I-WITHFP-NEXT: addi a0, a0, 491 -; RV32I-WITHFP-NEXT: sw a0, -20(s0) +; RV32I-WITHFP-NEXT: sw a0, -12(s0) ; RV32I-WITHFP-NEXT: lui a0, 545260 ; RV32I-WITHFP-NEXT: addi a0, a0, -1967 -; RV32I-WITHFP-NEXT: sw a0, -24(s0) +; RV32I-WITHFP-NEXT: sw a0, -16(s0) ; RV32I-WITHFP-NEXT: lui a0, 964690 ; RV32I-WITHFP-NEXT: addi a0, a0, -328 -; RV32I-WITHFP-NEXT: sw a0, -28(s0) +; RV32I-WITHFP-NEXT: sw a0, -20(s0) ; RV32I-WITHFP-NEXT: lui a0, 335544 ; RV32I-WITHFP-NEXT: addi t0, a0, 1311 ; RV32I-WITHFP-NEXT: lui a0, 688509 ; RV32I-WITHFP-NEXT: addi a5, a0, -2048 ; RV32I-WITHFP-NEXT: addi a0, zero, 1 ; RV32I-WITHFP-NEXT: addi a1, zero, 11 -; RV32I-WITHFP-NEXT: addi a2, s0, -32 +; RV32I-WITHFP-NEXT: addi a2, s0, -24 ; RV32I-WITHFP-NEXT: addi a3, zero, 12 ; RV32I-WITHFP-NEXT: addi a4, zero, 13 ; RV32I-WITHFP-NEXT: addi a6, zero, 4 ; RV32I-WITHFP-NEXT: addi a7, zero, 14 -; RV32I-WITHFP-NEXT: sw t0, -32(s0) +; RV32I-WITHFP-NEXT: sw t0, -24(s0) ; RV32I-WITHFP-NEXT: call callee_aligned_stack@plt ; RV32I-WITHFP-NEXT: lw s0, 56(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll @@ -248,17 +248,17 @@ ; RV32I-FPELIM-NEXT: addi sp, sp, -48 ; RV32I-FPELIM-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32I-FPELIM-NEXT: lui a0, 524272 -; RV32I-FPELIM-NEXT: sw a0, 12(sp) -; RV32I-FPELIM-NEXT: sw zero, 8(sp) -; RV32I-FPELIM-NEXT: sw zero, 4(sp) -; RV32I-FPELIM-NEXT: sw zero, 0(sp) +; RV32I-FPELIM-NEXT: sw a0, 24(sp) +; RV32I-FPELIM-NEXT: sw zero, 20(sp) +; RV32I-FPELIM-NEXT: sw zero, 16(sp) +; RV32I-FPELIM-NEXT: sw zero, 12(sp) +; RV32I-FPELIM-NEXT: sw zero, 40(sp) ; RV32I-FPELIM-NEXT: sw zero, 36(sp) ; RV32I-FPELIM-NEXT: sw zero, 32(sp) -; RV32I-FPELIM-NEXT: sw zero, 28(sp) ; RV32I-FPELIM-NEXT: addi a2, zero, 1 -; RV32I-FPELIM-NEXT: addi a0, sp, 24 -; RV32I-FPELIM-NEXT: mv a1, sp -; RV32I-FPELIM-NEXT: sw a2, 24(sp) +; RV32I-FPELIM-NEXT: addi a0, sp, 28 +; RV32I-FPELIM-NEXT: addi a1, sp, 12 +; RV32I-FPELIM-NEXT: sw a2, 28(sp) ; RV32I-FPELIM-NEXT: call callee_large_scalars@plt ; RV32I-FPELIM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32I-FPELIM-NEXT: addi sp, sp, 48 @@ -271,16 +271,16 @@ ; RV32I-WITHFP-NEXT: sw s0, 40(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: addi s0, sp, 48 ; RV32I-WITHFP-NEXT: lui a0, 524272 -; RV32I-WITHFP-NEXT: sw a0, -36(s0) +; RV32I-WITHFP-NEXT: sw a0, -28(s0) +; RV32I-WITHFP-NEXT: sw zero, -32(s0) +; RV32I-WITHFP-NEXT: sw zero, -36(s0) ; RV32I-WITHFP-NEXT: sw zero, -40(s0) -; RV32I-WITHFP-NEXT: sw zero, -44(s0) -; RV32I-WITHFP-NEXT: sw zero, -48(s0) ; RV32I-WITHFP-NEXT: sw zero, -12(s0) ; RV32I-WITHFP-NEXT: sw zero, -16(s0) ; RV32I-WITHFP-NEXT: sw zero, -20(s0) ; RV32I-WITHFP-NEXT: addi a2, zero, 1 ; RV32I-WITHFP-NEXT: addi a0, s0, -24 -; RV32I-WITHFP-NEXT: addi a1, s0, -48 +; RV32I-WITHFP-NEXT: addi a1, s0, -40 ; RV32I-WITHFP-NEXT: sw a2, -24(s0) ; RV32I-WITHFP-NEXT: call callee_large_scalars@plt ; RV32I-WITHFP-NEXT: lw s0, 40(sp) # 4-byte Folded Reload @@ -353,20 +353,20 @@ define i32 @caller_large_scalars_exhausted_regs() nounwind { ; RV32I-FPELIM-LABEL: caller_large_scalars_exhausted_regs: ; RV32I-FPELIM: # %bb.0: -; RV32I-FPELIM-NEXT: addi sp, sp, -64 -; RV32I-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; RV32I-FPELIM-NEXT: addi a0, sp, 16 +; RV32I-FPELIM-NEXT: addi sp, sp, -48 +; RV32I-FPELIM-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-FPELIM-NEXT: addi a0, sp, 12 ; RV32I-FPELIM-NEXT: sw a0, 4(sp) ; RV32I-FPELIM-NEXT: addi a0, zero, 9 ; RV32I-FPELIM-NEXT: sw a0, 0(sp) ; RV32I-FPELIM-NEXT: lui a0, 524272 -; RV32I-FPELIM-NEXT: sw a0, 28(sp) -; RV32I-FPELIM-NEXT: sw zero, 24(sp) +; RV32I-FPELIM-NEXT: sw a0, 24(sp) ; RV32I-FPELIM-NEXT: sw zero, 20(sp) ; RV32I-FPELIM-NEXT: sw zero, 16(sp) -; RV32I-FPELIM-NEXT: sw zero, 52(sp) -; RV32I-FPELIM-NEXT: sw zero, 48(sp) -; RV32I-FPELIM-NEXT: sw zero, 44(sp) +; RV32I-FPELIM-NEXT: sw zero, 12(sp) +; RV32I-FPELIM-NEXT: sw zero, 40(sp) +; RV32I-FPELIM-NEXT: sw zero, 36(sp) +; RV32I-FPELIM-NEXT: sw zero, 32(sp) ; RV32I-FPELIM-NEXT: addi t0, zero, 8 ; RV32I-FPELIM-NEXT: addi a0, zero, 1 ; RV32I-FPELIM-NEXT: addi a1, zero, 2 @@ -375,28 +375,28 @@ ; RV32I-FPELIM-NEXT: addi a4, zero, 5 ; RV32I-FPELIM-NEXT: addi a5, zero, 6 ; RV32I-FPELIM-NEXT: addi a6, zero, 7 -; RV32I-FPELIM-NEXT: addi a7, sp, 40 -; RV32I-FPELIM-NEXT: sw t0, 40(sp) +; RV32I-FPELIM-NEXT: addi a7, sp, 28 +; RV32I-FPELIM-NEXT: sw t0, 28(sp) ; RV32I-FPELIM-NEXT: call callee_large_scalars_exhausted_regs@plt -; RV32I-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; RV32I-FPELIM-NEXT: addi sp, sp, 64 +; RV32I-FPELIM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-FPELIM-NEXT: addi sp, sp, 48 ; RV32I-FPELIM-NEXT: ret ; ; RV32I-WITHFP-LABEL: caller_large_scalars_exhausted_regs: ; RV32I-WITHFP: # %bb.0: -; RV32I-WITHFP-NEXT: addi sp, sp, -64 -; RV32I-WITHFP-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; RV32I-WITHFP-NEXT: sw s0, 56(sp) # 4-byte Folded Spill -; RV32I-WITHFP-NEXT: addi s0, sp, 64 -; RV32I-WITHFP-NEXT: addi a0, s0, -48 +; RV32I-WITHFP-NEXT: addi sp, sp, -48 +; RV32I-WITHFP-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-WITHFP-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; RV32I-WITHFP-NEXT: addi s0, sp, 48 +; RV32I-WITHFP-NEXT: addi a0, s0, -40 ; RV32I-WITHFP-NEXT: sw a0, 4(sp) ; RV32I-WITHFP-NEXT: addi a0, zero, 9 ; RV32I-WITHFP-NEXT: sw a0, 0(sp) ; RV32I-WITHFP-NEXT: lui a0, 524272 -; RV32I-WITHFP-NEXT: sw a0, -36(s0) +; RV32I-WITHFP-NEXT: sw a0, -28(s0) +; RV32I-WITHFP-NEXT: sw zero, -32(s0) +; RV32I-WITHFP-NEXT: sw zero, -36(s0) ; RV32I-WITHFP-NEXT: sw zero, -40(s0) -; RV32I-WITHFP-NEXT: sw zero, -44(s0) -; RV32I-WITHFP-NEXT: sw zero, -48(s0) ; RV32I-WITHFP-NEXT: sw zero, -12(s0) ; RV32I-WITHFP-NEXT: sw zero, -16(s0) ; RV32I-WITHFP-NEXT: sw zero, -20(s0) @@ -411,9 +411,9 @@ ; RV32I-WITHFP-NEXT: addi a7, s0, -24 ; RV32I-WITHFP-NEXT: sw t0, -24(s0) ; RV32I-WITHFP-NEXT: call callee_large_scalars_exhausted_regs@plt -; RV32I-WITHFP-NEXT: lw s0, 56(sp) # 4-byte Folded Reload -; RV32I-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; RV32I-WITHFP-NEXT: addi sp, sp, 64 +; RV32I-WITHFP-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; RV32I-WITHFP-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-WITHFP-NEXT: addi sp, sp, 48 ; RV32I-WITHFP-NEXT: ret %1 = call i32 @callee_large_scalars_exhausted_regs( i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i128 8, i32 9, @@ -667,8 +667,8 @@ ; should only be 4-byte aligned ; RV32I-FPELIM-LABEL: caller_aligned_stack: ; RV32I-FPELIM: # %bb.0: -; RV32I-FPELIM-NEXT: addi sp, sp, -64 -; RV32I-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32I-FPELIM-NEXT: addi sp, sp, -48 +; RV32I-FPELIM-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32I-FPELIM-NEXT: addi a0, zero, 19 ; RV32I-FPELIM-NEXT: sw a0, 24(sp) ; RV32I-FPELIM-NEXT: addi a0, zero, 18 @@ -682,28 +682,28 @@ ; RV32I-FPELIM-NEXT: sw a0, 0(sp) ; RV32I-FPELIM-NEXT: lui a0, 262153 ; RV32I-FPELIM-NEXT: addi a0, a0, 491 -; RV32I-FPELIM-NEXT: sw a0, 44(sp) +; RV32I-FPELIM-NEXT: sw a0, 40(sp) ; RV32I-FPELIM-NEXT: lui a0, 545260 ; RV32I-FPELIM-NEXT: addi a0, a0, -1967 -; RV32I-FPELIM-NEXT: sw a0, 40(sp) +; RV32I-FPELIM-NEXT: sw a0, 36(sp) ; RV32I-FPELIM-NEXT: lui a0, 964690 ; RV32I-FPELIM-NEXT: addi a0, a0, -328 -; RV32I-FPELIM-NEXT: sw a0, 36(sp) +; RV32I-FPELIM-NEXT: sw a0, 32(sp) ; RV32I-FPELIM-NEXT: lui a0, 335544 ; RV32I-FPELIM-NEXT: addi t0, a0, 1311 ; RV32I-FPELIM-NEXT: lui a0, 688509 ; RV32I-FPELIM-NEXT: addi a5, a0, -2048 ; RV32I-FPELIM-NEXT: addi a0, zero, 1 ; RV32I-FPELIM-NEXT: addi a1, zero, 11 -; RV32I-FPELIM-NEXT: addi a2, sp, 32 +; RV32I-FPELIM-NEXT: addi a2, sp, 28 ; RV32I-FPELIM-NEXT: addi a3, zero, 12 ; RV32I-FPELIM-NEXT: addi a4, zero, 13 ; RV32I-FPELIM-NEXT: addi a6, zero, 4 ; RV32I-FPELIM-NEXT: addi a7, zero, 14 -; RV32I-FPELIM-NEXT: sw t0, 32(sp) +; RV32I-FPELIM-NEXT: sw t0, 28(sp) ; RV32I-FPELIM-NEXT: call callee_aligned_stack@plt -; RV32I-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; RV32I-FPELIM-NEXT: addi sp, sp, 64 +; RV32I-FPELIM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-FPELIM-NEXT: addi sp, sp, 48 ; RV32I-FPELIM-NEXT: ret ; ; RV32I-WITHFP-LABEL: caller_aligned_stack: @@ -725,25 +725,25 @@ ; RV32I-WITHFP-NEXT: sw a0, 0(sp) ; RV32I-WITHFP-NEXT: lui a0, 262153 ; RV32I-WITHFP-NEXT: addi a0, a0, 491 -; RV32I-WITHFP-NEXT: sw a0, -20(s0) +; RV32I-WITHFP-NEXT: sw a0, -12(s0) ; RV32I-WITHFP-NEXT: lui a0, 545260 ; RV32I-WITHFP-NEXT: addi a0, a0, -1967 -; RV32I-WITHFP-NEXT: sw a0, -24(s0) +; RV32I-WITHFP-NEXT: sw a0, -16(s0) ; RV32I-WITHFP-NEXT: lui a0, 964690 ; RV32I-WITHFP-NEXT: addi a0, a0, -328 -; RV32I-WITHFP-NEXT: sw a0, -28(s0) +; RV32I-WITHFP-NEXT: sw a0, -20(s0) ; RV32I-WITHFP-NEXT: lui a0, 335544 ; RV32I-WITHFP-NEXT: addi t0, a0, 1311 ; RV32I-WITHFP-NEXT: lui a0, 688509 ; RV32I-WITHFP-NEXT: addi a5, a0, -2048 ; RV32I-WITHFP-NEXT: addi a0, zero, 1 ; RV32I-WITHFP-NEXT: addi a1, zero, 11 -; RV32I-WITHFP-NEXT: addi a2, s0, -32 +; RV32I-WITHFP-NEXT: addi a2, s0, -24 ; RV32I-WITHFP-NEXT: addi a3, zero, 12 ; RV32I-WITHFP-NEXT: addi a4, zero, 13 ; RV32I-WITHFP-NEXT: addi a6, zero, 4 ; RV32I-WITHFP-NEXT: addi a7, zero, 14 -; RV32I-WITHFP-NEXT: sw t0, -32(s0) +; RV32I-WITHFP-NEXT: sw t0, -24(s0) ; RV32I-WITHFP-NEXT: call callee_aligned_stack@plt ; RV32I-WITHFP-NEXT: lw s0, 56(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll --- a/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll @@ -134,18 +134,18 @@ ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -80 ; RV64I-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd zero, 32(sp) ; RV64I-NEXT: sd zero, 24(sp) ; RV64I-NEXT: sd zero, 16(sp) -; RV64I-NEXT: sd zero, 8(sp) ; RV64I-NEXT: addi a0, zero, 2 -; RV64I-NEXT: sd a0, 0(sp) +; RV64I-NEXT: sd a0, 8(sp) +; RV64I-NEXT: sd zero, 64(sp) ; RV64I-NEXT: sd zero, 56(sp) ; RV64I-NEXT: sd zero, 48(sp) -; RV64I-NEXT: sd zero, 40(sp) ; RV64I-NEXT: addi a2, zero, 1 -; RV64I-NEXT: addi a0, sp, 32 -; RV64I-NEXT: mv a1, sp -; RV64I-NEXT: sd a2, 32(sp) +; RV64I-NEXT: addi a0, sp, 40 +; RV64I-NEXT: addi a1, sp, 8 +; RV64I-NEXT: sd a2, 40(sp) ; RV64I-NEXT: call callee_large_scalars@plt ; RV64I-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 80 @@ -189,18 +189,18 @@ ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -96 ; RV64I-NEXT: sd ra, 88(sp) # 8-byte Folded Spill -; RV64I-NEXT: addi a0, sp, 16 +; RV64I-NEXT: addi a0, sp, 24 ; RV64I-NEXT: sd a0, 8(sp) ; RV64I-NEXT: addi a0, zero, 9 ; RV64I-NEXT: sd a0, 0(sp) +; RV64I-NEXT: sd zero, 48(sp) ; RV64I-NEXT: sd zero, 40(sp) ; RV64I-NEXT: sd zero, 32(sp) -; RV64I-NEXT: sd zero, 24(sp) ; RV64I-NEXT: addi a0, zero, 10 -; RV64I-NEXT: sd a0, 16(sp) +; RV64I-NEXT: sd a0, 24(sp) +; RV64I-NEXT: sd zero, 80(sp) ; RV64I-NEXT: sd zero, 72(sp) ; RV64I-NEXT: sd zero, 64(sp) -; RV64I-NEXT: sd zero, 56(sp) ; RV64I-NEXT: addi t0, zero, 8 ; RV64I-NEXT: addi a0, zero, 1 ; RV64I-NEXT: addi a1, zero, 2 @@ -209,8 +209,8 @@ ; RV64I-NEXT: addi a4, zero, 5 ; RV64I-NEXT: addi a5, zero, 6 ; RV64I-NEXT: addi a6, zero, 7 -; RV64I-NEXT: addi a7, sp, 48 -; RV64I-NEXT: sd t0, 48(sp) +; RV64I-NEXT: addi a7, sp, 56 +; RV64I-NEXT: sd t0, 56(sp) ; RV64I-NEXT: call callee_large_scalars_exhausted_regs@plt ; RV64I-NEXT: ld ra, 88(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 96 diff --git a/llvm/test/CodeGen/RISCV/fp128.ll b/llvm/test/CodeGen/RISCV/fp128.ll --- a/llvm/test/CodeGen/RISCV/fp128.ll +++ b/llvm/test/CodeGen/RISCV/fp128.ll @@ -23,16 +23,16 @@ ; RV32I-NEXT: lw a2, %lo(y+4)(a4) ; RV32I-NEXT: lw a1, %lo(y+8)(a4) ; RV32I-NEXT: lw a4, %lo(y+12)(a4) -; RV32I-NEXT: sw a4, 20(sp) -; RV32I-NEXT: sw a1, 16(sp) -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: sw a5, 8(sp) -; RV32I-NEXT: sw a0, 36(sp) -; RV32I-NEXT: sw a3, 32(sp) -; RV32I-NEXT: sw a7, 28(sp) -; RV32I-NEXT: addi a0, sp, 24 -; RV32I-NEXT: addi a1, sp, 8 -; RV32I-NEXT: sw a6, 24(sp) +; RV32I-NEXT: sw a4, 24(sp) +; RV32I-NEXT: sw a1, 20(sp) +; RV32I-NEXT: sw a2, 16(sp) +; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a0, 40(sp) +; RV32I-NEXT: sw a3, 36(sp) +; RV32I-NEXT: sw a7, 32(sp) +; RV32I-NEXT: addi a0, sp, 28 +; RV32I-NEXT: addi a1, sp, 12 +; RV32I-NEXT: sw a6, 28(sp) ; RV32I-NEXT: call __netf2@plt ; RV32I-NEXT: snez a0, a0 ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll @@ -807,15 +807,15 @@ ; ; LMULMAX4-LABEL: ret_v32i32_call_v32i32_v32i32_v32i32_i32: ; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: addi sp, sp, -256 -; LMULMAX4-NEXT: .cfi_def_cfa_offset 256 -; LMULMAX4-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; LMULMAX4-NEXT: sd s0, 240(sp) # 8-byte Folded Spill +; LMULMAX4-NEXT: addi sp, sp, -192 +; LMULMAX4-NEXT: .cfi_def_cfa_offset 192 +; LMULMAX4-NEXT: sd ra, 184(sp) # 8-byte Folded Spill +; LMULMAX4-NEXT: sd s0, 176(sp) # 8-byte Folded Spill ; LMULMAX4-NEXT: .cfi_offset ra, -8 ; LMULMAX4-NEXT: .cfi_offset s0, -16 -; LMULMAX4-NEXT: addi s0, sp, 256 +; LMULMAX4-NEXT: addi s0, sp, 192 ; LMULMAX4-NEXT: .cfi_def_cfa s0, 0 -; LMULMAX4-NEXT: andi sp, sp, -128 +; LMULMAX4-NEXT: andi sp, sp, -64 ; LMULMAX4-NEXT: vsetivli a1, 16, e32,m4,ta,mu ; LMULMAX4-NEXT: vle32.v v28, (a0) ; LMULMAX4-NEXT: addi a0, a0, 64 @@ -828,23 +828,23 @@ ; LMULMAX4-NEXT: vmv4r.v v8, v28 ; LMULMAX4-NEXT: vmv4r.v v12, v24 ; LMULMAX4-NEXT: call ext3@plt -; LMULMAX4-NEXT: addi sp, s0, -256 -; LMULMAX4-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; LMULMAX4-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; LMULMAX4-NEXT: addi sp, sp, 256 +; LMULMAX4-NEXT: addi sp, s0, -192 +; LMULMAX4-NEXT: ld s0, 176(sp) # 8-byte Folded Reload +; LMULMAX4-NEXT: ld ra, 184(sp) # 8-byte Folded Reload +; LMULMAX4-NEXT: addi sp, sp, 192 ; LMULMAX4-NEXT: ret ; ; LMULMAX2-LABEL: ret_v32i32_call_v32i32_v32i32_v32i32_i32: ; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: addi sp, sp, -256 -; LMULMAX2-NEXT: .cfi_def_cfa_offset 256 -; LMULMAX2-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; LMULMAX2-NEXT: sd s0, 240(sp) # 8-byte Folded Spill +; LMULMAX2-NEXT: addi sp, sp, -160 +; LMULMAX2-NEXT: .cfi_def_cfa_offset 160 +; LMULMAX2-NEXT: sd ra, 152(sp) # 8-byte Folded Spill +; LMULMAX2-NEXT: sd s0, 144(sp) # 8-byte Folded Spill ; LMULMAX2-NEXT: .cfi_offset ra, -8 ; LMULMAX2-NEXT: .cfi_offset s0, -16 -; LMULMAX2-NEXT: addi s0, sp, 256 +; LMULMAX2-NEXT: addi s0, sp, 160 ; LMULMAX2-NEXT: .cfi_def_cfa s0, 0 -; LMULMAX2-NEXT: andi sp, sp, -128 +; LMULMAX2-NEXT: andi sp, sp, -32 ; LMULMAX2-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; LMULMAX2-NEXT: vle32.v v26, (a0) ; LMULMAX2-NEXT: addi a1, a0, 32 @@ -867,23 +867,18 @@ ; LMULMAX2-NEXT: vmv2r.v v12, v30 ; LMULMAX2-NEXT: vmv2r.v v14, v24 ; LMULMAX2-NEXT: call ext3@plt -; LMULMAX2-NEXT: addi sp, s0, -256 -; LMULMAX2-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; LMULMAX2-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; LMULMAX2-NEXT: addi sp, sp, 256 +; LMULMAX2-NEXT: addi sp, s0, -160 +; LMULMAX2-NEXT: ld s0, 144(sp) # 8-byte Folded Reload +; LMULMAX2-NEXT: ld ra, 152(sp) # 8-byte Folded Reload +; LMULMAX2-NEXT: addi sp, sp, 160 ; LMULMAX2-NEXT: ret ; ; LMULMAX1-LABEL: ret_v32i32_call_v32i32_v32i32_v32i32_i32: ; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi sp, sp, -384 -; LMULMAX1-NEXT: .cfi_def_cfa_offset 384 -; LMULMAX1-NEXT: sd ra, 376(sp) # 8-byte Folded Spill -; LMULMAX1-NEXT: sd s0, 368(sp) # 8-byte Folded Spill +; LMULMAX1-NEXT: addi sp, sp, -160 +; LMULMAX1-NEXT: .cfi_def_cfa_offset 160 +; LMULMAX1-NEXT: sd ra, 152(sp) # 8-byte Folded Spill ; LMULMAX1-NEXT: .cfi_offset ra, -8 -; LMULMAX1-NEXT: .cfi_offset s0, -16 -; LMULMAX1-NEXT: addi s0, sp, 384 -; LMULMAX1-NEXT: .cfi_def_cfa s0, 0 -; LMULMAX1-NEXT: andi sp, sp, -128 ; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,ta,mu ; LMULMAX1-NEXT: vle32.v v25, (a0) ; LMULMAX1-NEXT: addi a1, a0, 16 @@ -900,26 +895,26 @@ ; LMULMAX1-NEXT: vle32.v v31, (a1) ; LMULMAX1-NEXT: addi a0, a0, 112 ; LMULMAX1-NEXT: vle32.v v24, (a0) -; LMULMAX1-NEXT: ld a1, 0(s0) -; LMULMAX1-NEXT: addi a0, sp, 240 +; LMULMAX1-NEXT: ld a1, 160(sp) +; LMULMAX1-NEXT: addi a0, sp, 128 ; LMULMAX1-NEXT: vse32.v v15, (a0) -; LMULMAX1-NEXT: addi a0, sp, 224 +; LMULMAX1-NEXT: addi a0, sp, 112 ; LMULMAX1-NEXT: vse32.v v14, (a0) -; LMULMAX1-NEXT: addi a0, sp, 208 +; LMULMAX1-NEXT: addi a0, sp, 96 ; LMULMAX1-NEXT: vse32.v v13, (a0) -; LMULMAX1-NEXT: addi a0, sp, 192 +; LMULMAX1-NEXT: addi a0, sp, 80 ; LMULMAX1-NEXT: vse32.v v12, (a0) -; LMULMAX1-NEXT: addi a0, sp, 176 +; LMULMAX1-NEXT: addi a0, sp, 64 ; LMULMAX1-NEXT: vse32.v v11, (a0) -; LMULMAX1-NEXT: addi a0, sp, 160 +; LMULMAX1-NEXT: addi a0, sp, 48 ; LMULMAX1-NEXT: vse32.v v10, (a0) -; LMULMAX1-NEXT: addi a0, sp, 144 +; LMULMAX1-NEXT: addi a0, sp, 32 ; LMULMAX1-NEXT: vse32.v v9, (a0) -; LMULMAX1-NEXT: addi a0, sp, 128 +; LMULMAX1-NEXT: addi a0, sp, 16 ; LMULMAX1-NEXT: vse32.v v8, (a0) ; LMULMAX1-NEXT: addi a0, zero, 42 ; LMULMAX1-NEXT: sd a0, 8(sp) -; LMULMAX1-NEXT: addi a0, sp, 128 +; LMULMAX1-NEXT: addi a0, sp, 16 ; LMULMAX1-NEXT: sd a1, 0(sp) ; LMULMAX1-NEXT: vmv1r.v v8, v25 ; LMULMAX1-NEXT: vmv1r.v v9, v26 @@ -930,10 +925,8 @@ ; LMULMAX1-NEXT: vmv1r.v v14, v31 ; LMULMAX1-NEXT: vmv1r.v v15, v24 ; LMULMAX1-NEXT: call ext3@plt -; LMULMAX1-NEXT: addi sp, s0, -384 -; LMULMAX1-NEXT: ld s0, 368(sp) # 8-byte Folded Reload -; LMULMAX1-NEXT: ld ra, 376(sp) # 8-byte Folded Reload -; LMULMAX1-NEXT: addi sp, sp, 384 +; LMULMAX1-NEXT: ld ra, 152(sp) # 8-byte Folded Reload +; LMULMAX1-NEXT: addi sp, sp, 160 ; LMULMAX1-NEXT: ret %t = call <32 x i32> @ext3(<32 x i32> %z, <32 x i32> %y, <32 x i32> %x, i32 %w, i32 42) ret <32 x i32> %t @@ -1042,15 +1035,15 @@ ; ; LMULMAX4-LABEL: call_split_vector_args: ; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: addi sp, sp, -256 -; LMULMAX4-NEXT: .cfi_def_cfa_offset 256 -; LMULMAX4-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; LMULMAX4-NEXT: sd s0, 240(sp) # 8-byte Folded Spill +; LMULMAX4-NEXT: addi sp, sp, -192 +; LMULMAX4-NEXT: .cfi_def_cfa_offset 192 +; LMULMAX4-NEXT: sd ra, 184(sp) # 8-byte Folded Spill +; LMULMAX4-NEXT: sd s0, 176(sp) # 8-byte Folded Spill ; LMULMAX4-NEXT: .cfi_offset ra, -8 ; LMULMAX4-NEXT: .cfi_offset s0, -16 -; LMULMAX4-NEXT: addi s0, sp, 256 +; LMULMAX4-NEXT: addi s0, sp, 192 ; LMULMAX4-NEXT: .cfi_def_cfa s0, 0 -; LMULMAX4-NEXT: andi sp, sp, -128 +; LMULMAX4-NEXT: andi sp, sp, -64 ; LMULMAX4-NEXT: vsetivli a2, 2, e32,m1,ta,mu ; LMULMAX4-NEXT: vle32.v v8, (a0) ; LMULMAX4-NEXT: vsetivli a0, 16, e32,m4,ta,mu @@ -1066,23 +1059,23 @@ ; LMULMAX4-NEXT: vmv1r.v v11, v8 ; LMULMAX4-NEXT: vmv1r.v v12, v8 ; LMULMAX4-NEXT: call split_vector_args@plt -; LMULMAX4-NEXT: addi sp, s0, -256 -; LMULMAX4-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; LMULMAX4-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; LMULMAX4-NEXT: addi sp, sp, 256 +; LMULMAX4-NEXT: addi sp, s0, -192 +; LMULMAX4-NEXT: ld s0, 176(sp) # 8-byte Folded Reload +; LMULMAX4-NEXT: ld ra, 184(sp) # 8-byte Folded Reload +; LMULMAX4-NEXT: addi sp, sp, 192 ; LMULMAX4-NEXT: ret ; ; LMULMAX2-LABEL: call_split_vector_args: ; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: addi sp, sp, -256 -; LMULMAX2-NEXT: .cfi_def_cfa_offset 256 -; LMULMAX2-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; LMULMAX2-NEXT: sd s0, 240(sp) # 8-byte Folded Spill +; LMULMAX2-NEXT: addi sp, sp, -128 +; LMULMAX2-NEXT: .cfi_def_cfa_offset 128 +; LMULMAX2-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; LMULMAX2-NEXT: sd s0, 112(sp) # 8-byte Folded Spill ; LMULMAX2-NEXT: .cfi_offset ra, -8 ; LMULMAX2-NEXT: .cfi_offset s0, -16 -; LMULMAX2-NEXT: addi s0, sp, 256 +; LMULMAX2-NEXT: addi s0, sp, 128 ; LMULMAX2-NEXT: .cfi_def_cfa s0, 0 -; LMULMAX2-NEXT: andi sp, sp, -128 +; LMULMAX2-NEXT: andi sp, sp, -32 ; LMULMAX2-NEXT: vsetivli a2, 2, e32,m1,ta,mu ; LMULMAX2-NEXT: vle32.v v8, (a0) ; LMULMAX2-NEXT: vsetivli a0, 8, e32,m2,ta,mu @@ -1105,23 +1098,18 @@ ; LMULMAX2-NEXT: vmv1r.v v12, v8 ; LMULMAX2-NEXT: vmv2r.v v22, v14 ; LMULMAX2-NEXT: call split_vector_args@plt -; LMULMAX2-NEXT: addi sp, s0, -256 -; LMULMAX2-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; LMULMAX2-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; LMULMAX2-NEXT: addi sp, sp, 256 +; LMULMAX2-NEXT: addi sp, s0, -128 +; LMULMAX2-NEXT: ld s0, 112(sp) # 8-byte Folded Reload +; LMULMAX2-NEXT: ld ra, 120(sp) # 8-byte Folded Reload +; LMULMAX2-NEXT: addi sp, sp, 128 ; LMULMAX2-NEXT: ret ; ; LMULMAX1-LABEL: call_split_vector_args: ; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi sp, sp, -256 -; LMULMAX1-NEXT: .cfi_def_cfa_offset 256 -; LMULMAX1-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; LMULMAX1-NEXT: sd s0, 240(sp) # 8-byte Folded Spill +; LMULMAX1-NEXT: addi sp, sp, -96 +; LMULMAX1-NEXT: .cfi_def_cfa_offset 96 +; LMULMAX1-NEXT: sd ra, 88(sp) # 8-byte Folded Spill ; LMULMAX1-NEXT: .cfi_offset ra, -8 -; LMULMAX1-NEXT: .cfi_offset s0, -16 -; LMULMAX1-NEXT: addi s0, sp, 256 -; LMULMAX1-NEXT: .cfi_def_cfa s0, 0 -; LMULMAX1-NEXT: andi sp, sp, -128 ; LMULMAX1-NEXT: vsetivli a2, 2, e32,m1,ta,mu ; LMULMAX1-NEXT: vle32.v v8, (a0) ; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu @@ -1158,10 +1146,8 @@ ; LMULMAX1-NEXT: vmv1r.v v22, v14 ; LMULMAX1-NEXT: vmv1r.v v23, v15 ; LMULMAX1-NEXT: call split_vector_args@plt -; LMULMAX1-NEXT: addi sp, s0, -256 -; LMULMAX1-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; LMULMAX1-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; LMULMAX1-NEXT: addi sp, sp, 256 +; LMULMAX1-NEXT: ld ra, 88(sp) # 8-byte Folded Reload +; LMULMAX1-NEXT: addi sp, sp, 96 ; LMULMAX1-NEXT: ret %a = load <2 x i32>, <2 x i32>* %pa %b = load <32 x i32>, <32 x i32>* %pb diff --git a/llvm/test/CodeGen/RISCV/stack-slot-size.ll b/llvm/test/CodeGen/RISCV/stack-slot-size.ll --- a/llvm/test/CodeGen/RISCV/stack-slot-size.ll +++ b/llvm/test/CodeGen/RISCV/stack-slot-size.ll @@ -13,7 +13,6 @@ declare void @callee160(i160) declare void @callee161(i161) -; FIXME: Stack write clobbers the spilled value (on RV64). define i32 @caller129() nounwind { ; RV32I-LABEL: caller129: ; RV32I: # %bb.0: @@ -21,12 +20,12 @@ ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32I-NEXT: addi a0, zero, 42 ; RV32I-NEXT: sw a0, 24(sp) +; RV32I-NEXT: sw zero, 20(sp) ; RV32I-NEXT: sw zero, 16(sp) ; RV32I-NEXT: sw zero, 12(sp) ; RV32I-NEXT: sw zero, 8(sp) +; RV32I-NEXT: addi a0, sp, 4 ; RV32I-NEXT: sw zero, 4(sp) -; RV32I-NEXT: mv a0, sp -; RV32I-NEXT: sw zero, 0(sp) ; RV32I-NEXT: call callee129@plt ; RV32I-NEXT: lw a0, 24(sp) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -35,18 +34,18 @@ ; ; RV64I-LABEL: caller129: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -32 -; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; RV64I-NEXT: addi a0, zero, 42 -; RV64I-NEXT: sw a0, 20(sp) +; RV64I-NEXT: sw a0, 36(sp) +; RV64I-NEXT: sd zero, 24(sp) ; RV64I-NEXT: sd zero, 16(sp) +; RV64I-NEXT: addi a0, sp, 8 ; RV64I-NEXT: sd zero, 8(sp) -; RV64I-NEXT: mv a0, sp -; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: call callee129@plt -; RV64I-NEXT: lw a0, 20(sp) -; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: lw a0, 36(sp) +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret %1 = alloca i32 store i32 42, i32* %1 @@ -55,7 +54,6 @@ ret i32 %2 } -; FIXME: Stack write clobbers the spilled value (on RV64). define i32 @caller160() nounwind { ; RV32I-LABEL: caller160: ; RV32I: # %bb.0: @@ -63,12 +61,12 @@ ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32I-NEXT: addi a0, zero, 42 ; RV32I-NEXT: sw a0, 24(sp) +; RV32I-NEXT: sw zero, 20(sp) ; RV32I-NEXT: sw zero, 16(sp) ; RV32I-NEXT: sw zero, 12(sp) ; RV32I-NEXT: sw zero, 8(sp) +; RV32I-NEXT: addi a0, sp, 4 ; RV32I-NEXT: sw zero, 4(sp) -; RV32I-NEXT: mv a0, sp -; RV32I-NEXT: sw zero, 0(sp) ; RV32I-NEXT: call callee160@plt ; RV32I-NEXT: lw a0, 24(sp) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -77,18 +75,18 @@ ; ; RV64I-LABEL: caller160: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -32 -; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; RV64I-NEXT: addi a0, zero, 42 -; RV64I-NEXT: sw a0, 20(sp) +; RV64I-NEXT: sw a0, 36(sp) +; RV64I-NEXT: sd zero, 24(sp) ; RV64I-NEXT: sd zero, 16(sp) +; RV64I-NEXT: addi a0, sp, 8 ; RV64I-NEXT: sd zero, 8(sp) -; RV64I-NEXT: mv a0, sp -; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: call callee160@plt -; RV64I-NEXT: lw a0, 20(sp) -; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: lw a0, 36(sp) +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret %1 = alloca i32 store i32 42, i32* %1 @@ -123,10 +121,10 @@ ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; RV64I-NEXT: addi a0, zero, 42 ; RV64I-NEXT: sw a0, 36(sp) +; RV64I-NEXT: sd zero, 24(sp) ; RV64I-NEXT: sd zero, 16(sp) +; RV64I-NEXT: addi a0, sp, 8 ; RV64I-NEXT: sd zero, 8(sp) -; RV64I-NEXT: mv a0, sp -; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: call callee161@plt ; RV64I-NEXT: lw a0, 36(sp) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/vararg.ll b/llvm/test/CodeGen/RISCV/vararg.ll --- a/llvm/test/CodeGen/RISCV/vararg.ll +++ b/llvm/test/CodeGen/RISCV/vararg.ll @@ -1394,8 +1394,8 @@ ; should only be 4-byte aligned ; ILP32-ILP32F-FPELIM-LABEL: va5_aligned_stack_caller: ; ILP32-ILP32F-FPELIM: # %bb.0: -; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, -64 -; ILP32-ILP32F-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, -48 +; ILP32-ILP32F-FPELIM-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; ILP32-ILP32F-FPELIM-NEXT: addi a0, zero, 17 ; ILP32-ILP32F-FPELIM-NEXT: sw a0, 24(sp) ; ILP32-ILP32F-FPELIM-NEXT: addi a0, zero, 16 @@ -1412,27 +1412,27 @@ ; ILP32-ILP32F-FPELIM-NEXT: sw a0, 0(sp) ; ILP32-ILP32F-FPELIM-NEXT: lui a0, 262153 ; ILP32-ILP32F-FPELIM-NEXT: addi a0, a0, 491 -; ILP32-ILP32F-FPELIM-NEXT: sw a0, 44(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a0, 40(sp) ; ILP32-ILP32F-FPELIM-NEXT: lui a0, 545260 ; ILP32-ILP32F-FPELIM-NEXT: addi a0, a0, -1967 -; ILP32-ILP32F-FPELIM-NEXT: sw a0, 40(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a0, 36(sp) ; ILP32-ILP32F-FPELIM-NEXT: lui a0, 964690 ; ILP32-ILP32F-FPELIM-NEXT: addi a0, a0, -328 -; ILP32-ILP32F-FPELIM-NEXT: sw a0, 36(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a0, 32(sp) ; ILP32-ILP32F-FPELIM-NEXT: lui a0, 335544 ; ILP32-ILP32F-FPELIM-NEXT: addi a5, a0, 1311 ; ILP32-ILP32F-FPELIM-NEXT: lui a0, 688509 ; ILP32-ILP32F-FPELIM-NEXT: addi a6, a0, -2048 ; ILP32-ILP32F-FPELIM-NEXT: addi a0, zero, 1 ; ILP32-ILP32F-FPELIM-NEXT: addi a1, zero, 11 -; ILP32-ILP32F-FPELIM-NEXT: addi a2, sp, 32 +; ILP32-ILP32F-FPELIM-NEXT: addi a2, sp, 28 ; ILP32-ILP32F-FPELIM-NEXT: addi a3, zero, 12 ; ILP32-ILP32F-FPELIM-NEXT: addi a4, zero, 13 ; ILP32-ILP32F-FPELIM-NEXT: addi a7, zero, 4 -; ILP32-ILP32F-FPELIM-NEXT: sw a5, 32(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a5, 28(sp) ; ILP32-ILP32F-FPELIM-NEXT: call va5_aligned_stack_callee@plt -; ILP32-ILP32F-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, 64 +; ILP32-ILP32F-FPELIM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, 48 ; ILP32-ILP32F-FPELIM-NEXT: ret ; ; ILP32-ILP32F-WITHFP-LABEL: va5_aligned_stack_caller: @@ -1457,24 +1457,24 @@ ; ILP32-ILP32F-WITHFP-NEXT: sw a0, 0(sp) ; ILP32-ILP32F-WITHFP-NEXT: lui a0, 262153 ; ILP32-ILP32F-WITHFP-NEXT: addi a0, a0, 491 -; ILP32-ILP32F-WITHFP-NEXT: sw a0, -20(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a0, -12(s0) ; ILP32-ILP32F-WITHFP-NEXT: lui a0, 545260 ; ILP32-ILP32F-WITHFP-NEXT: addi a0, a0, -1967 -; ILP32-ILP32F-WITHFP-NEXT: sw a0, -24(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a0, -16(s0) ; ILP32-ILP32F-WITHFP-NEXT: lui a0, 964690 ; ILP32-ILP32F-WITHFP-NEXT: addi a0, a0, -328 -; ILP32-ILP32F-WITHFP-NEXT: sw a0, -28(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a0, -20(s0) ; ILP32-ILP32F-WITHFP-NEXT: lui a0, 335544 ; ILP32-ILP32F-WITHFP-NEXT: addi a5, a0, 1311 ; ILP32-ILP32F-WITHFP-NEXT: lui a0, 688509 ; ILP32-ILP32F-WITHFP-NEXT: addi a6, a0, -2048 ; ILP32-ILP32F-WITHFP-NEXT: addi a0, zero, 1 ; ILP32-ILP32F-WITHFP-NEXT: addi a1, zero, 11 -; ILP32-ILP32F-WITHFP-NEXT: addi a2, s0, -32 +; ILP32-ILP32F-WITHFP-NEXT: addi a2, s0, -24 ; ILP32-ILP32F-WITHFP-NEXT: addi a3, zero, 12 ; ILP32-ILP32F-WITHFP-NEXT: addi a4, zero, 13 ; ILP32-ILP32F-WITHFP-NEXT: addi a7, zero, 4 -; ILP32-ILP32F-WITHFP-NEXT: sw a5, -32(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a5, -24(s0) ; ILP32-ILP32F-WITHFP-NEXT: call va5_aligned_stack_callee@plt ; ILP32-ILP32F-WITHFP-NEXT: lw s0, 56(sp) # 4-byte Folded Reload ; ILP32-ILP32F-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload @@ -1483,8 +1483,8 @@ ; ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-LABEL: va5_aligned_stack_caller: ; RV32D-ILP32-ILP32F-ILP32D-FPELIM: # %bb.0: -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, -64 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, -48 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a0, 262236 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, a0, 655 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 12(sp) @@ -1501,27 +1501,27 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 0(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a0, 262153 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, a0, 491 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 44(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 40(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a0, 545260 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, a0, -1967 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 40(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 36(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a0, 964690 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, a0, -328 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 36(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 32(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a0, 335544 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a5, a0, 1311 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a0, 688509 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a6, a0, -2048 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, zero, 1 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a1, zero, 11 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a2, sp, 32 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a2, sp, 28 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a3, zero, 12 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a4, zero, 13 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a7, zero, 4 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 32(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 28(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: call va5_aligned_stack_callee@plt -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 64 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: ret ; ; LP64-LP64F-LP64D-FPELIM-LABEL: va5_aligned_stack_caller: diff --git a/llvm/test/CodeGen/RISCV/vector-abi.ll b/llvm/test/CodeGen/RISCV/vector-abi.ll --- a/llvm/test/CodeGen/RISCV/vector-abi.ll +++ b/llvm/test/CodeGen/RISCV/vector-abi.ll @@ -1,15 +1,12 @@ ; RUN: llc -mtriple=riscv32 -stop-after finalize-isel < %s | FileCheck %s -check-prefix=RV32 ; RUN: llc -mtriple=riscv64 -stop-after finalize-isel < %s | FileCheck %s -check-prefix=RV64 -; FIXME: The stack location used to pass the parameter to the function has the -; incorrect size and alignment for how we use it, and we clobber the stack. - declare void @callee(<4 x i8> %v) define void @caller() { ; RV32-LABEL: name: caller ; RV32: stack: - ; RV32: - { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4, + ; RV32: - { id: 0, name: '', type: default, offset: 0, size: 16, alignment: 4, ; RV32-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true, ; RV32-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } ; RV32: bb.0 (%ir-block.0): @@ -29,7 +26,7 @@ ; RV32: PseudoRET ; RV64-LABEL: name: caller ; RV64: stack: - ; RV64: - { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4, + ; RV64: - { id: 0, name: '', type: default, offset: 0, size: 32, alignment: 8, ; RV64-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true, ; RV64-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } ; RV64: bb.0 (%ir-block.0):