diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -6090,12 +6090,13 @@ bool IsVarArg = CLI.IsVarArg; EVT PtrVT = getPointerTy(DAG.getDataLayout()); MVT XLenVT = Subtarget.getXLenVT(); + LLVMContext &Ctx = *DAG.getContext(); MachineFunction &MF = DAG.getMachineFunction(); // Analyze the operands of the call, assigning locations to each operand. SmallVector ArgLocs; - CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); + CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx); if (CallConv == CallingConv::Fast) ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC); @@ -6188,7 +6189,21 @@ // For now, only handle fully promoted and indirect arguments. if (VA.getLocInfo() == CCValAssign::Indirect) { // Store the argument in a stack slot and pass its address. - SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT); + unsigned ArgIndex = Outs[i].OrigArgIndex; + EVT SlotVT; + if (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { + // Allocate the full stack space for a promoted (and split) argument. + Type *OrigArgType = CLI.Args[Outs[i].OrigArgIndex].Ty; + EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType); + MVT PartVT = + getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT); + unsigned N = + getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT); + SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N); + } else { + SlotVT = Outs[i].ArgVT; + } + SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT); int FI = cast(SpillSlot)->getIndex(); MemOpChains.push_back( DAG.getStore(Chain, DL, ArgValue, SpillSlot, @@ -6198,7 +6213,6 @@ // Vectors may be partly split to registers and partly to the stack, in // which case the base address is partly offset and subsequent stores are // relative to that. - unsigned ArgIndex = Outs[i].OrigArgIndex; unsigned ArgPartOffset = Outs[i].PartOffset; assert(VA.getValVT().isVector() || ArgPartOffset == 0); while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll @@ -161,25 +161,25 @@ ; RV32I-FPELIM-NEXT: sw a0, 0(sp) ; RV32I-FPELIM-NEXT: lui a0, 262153 ; RV32I-FPELIM-NEXT: addi a0, a0, 491 -; RV32I-FPELIM-NEXT: sw a0, 44(sp) +; RV32I-FPELIM-NEXT: sw a0, 52(sp) ; RV32I-FPELIM-NEXT: lui a0, 545260 ; RV32I-FPELIM-NEXT: addi a0, a0, -1967 -; RV32I-FPELIM-NEXT: sw a0, 40(sp) +; RV32I-FPELIM-NEXT: sw a0, 48(sp) ; RV32I-FPELIM-NEXT: lui a0, 964690 ; RV32I-FPELIM-NEXT: addi a0, a0, -328 -; RV32I-FPELIM-NEXT: sw a0, 36(sp) +; RV32I-FPELIM-NEXT: sw a0, 44(sp) ; RV32I-FPELIM-NEXT: lui a0, 335544 ; RV32I-FPELIM-NEXT: addi t0, a0, 1311 ; RV32I-FPELIM-NEXT: lui a0, 688509 ; RV32I-FPELIM-NEXT: addi a5, a0, -2048 ; RV32I-FPELIM-NEXT: addi a0, zero, 1 ; RV32I-FPELIM-NEXT: addi a1, zero, 11 -; RV32I-FPELIM-NEXT: addi a2, sp, 32 +; RV32I-FPELIM-NEXT: addi a2, sp, 40 ; RV32I-FPELIM-NEXT: addi a3, zero, 12 ; RV32I-FPELIM-NEXT: addi a4, zero, 13 ; RV32I-FPELIM-NEXT: addi a6, zero, 4 ; RV32I-FPELIM-NEXT: addi a7, zero, 14 -; RV32I-FPELIM-NEXT: sw t0, 32(sp) +; RV32I-FPELIM-NEXT: sw t0, 40(sp) ; RV32I-FPELIM-NEXT: call callee_aligned_stack@plt ; RV32I-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload ; RV32I-FPELIM-NEXT: addi sp, sp, 64 @@ -207,25 +207,25 @@ ; RV32I-WITHFP-NEXT: sw a0, 0(sp) ; RV32I-WITHFP-NEXT: lui a0, 262153 ; RV32I-WITHFP-NEXT: addi a0, a0, 491 -; RV32I-WITHFP-NEXT: sw a0, -20(s0) +; RV32I-WITHFP-NEXT: sw a0, -12(s0) ; RV32I-WITHFP-NEXT: lui a0, 545260 ; RV32I-WITHFP-NEXT: addi a0, a0, -1967 -; RV32I-WITHFP-NEXT: sw a0, -24(s0) +; RV32I-WITHFP-NEXT: sw a0, -16(s0) ; RV32I-WITHFP-NEXT: lui a0, 964690 ; RV32I-WITHFP-NEXT: addi a0, a0, -328 -; RV32I-WITHFP-NEXT: sw a0, -28(s0) +; RV32I-WITHFP-NEXT: sw a0, -20(s0) ; RV32I-WITHFP-NEXT: lui a0, 335544 ; RV32I-WITHFP-NEXT: addi t0, a0, 1311 ; RV32I-WITHFP-NEXT: lui a0, 688509 ; RV32I-WITHFP-NEXT: addi a5, a0, -2048 ; RV32I-WITHFP-NEXT: addi a0, zero, 1 ; RV32I-WITHFP-NEXT: addi a1, zero, 11 -; RV32I-WITHFP-NEXT: addi a2, s0, -32 +; RV32I-WITHFP-NEXT: addi a2, s0, -24 ; RV32I-WITHFP-NEXT: addi a3, zero, 12 ; RV32I-WITHFP-NEXT: addi a4, zero, 13 ; RV32I-WITHFP-NEXT: addi a6, zero, 4 ; RV32I-WITHFP-NEXT: addi a7, zero, 14 -; RV32I-WITHFP-NEXT: sw t0, -32(s0) +; RV32I-WITHFP-NEXT: sw t0, -24(s0) ; RV32I-WITHFP-NEXT: call callee_aligned_stack@plt ; RV32I-WITHFP-NEXT: lw s0, 56(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll @@ -248,16 +248,16 @@ ; RV32I-FPELIM-NEXT: addi sp, sp, -48 ; RV32I-FPELIM-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32I-FPELIM-NEXT: lui a0, 524272 -; RV32I-FPELIM-NEXT: sw a0, 12(sp) +; RV32I-FPELIM-NEXT: sw a0, 20(sp) +; RV32I-FPELIM-NEXT: sw zero, 16(sp) +; RV32I-FPELIM-NEXT: sw zero, 12(sp) ; RV32I-FPELIM-NEXT: sw zero, 8(sp) -; RV32I-FPELIM-NEXT: sw zero, 4(sp) -; RV32I-FPELIM-NEXT: sw zero, 0(sp) ; RV32I-FPELIM-NEXT: sw zero, 36(sp) ; RV32I-FPELIM-NEXT: sw zero, 32(sp) ; RV32I-FPELIM-NEXT: sw zero, 28(sp) ; RV32I-FPELIM-NEXT: addi a2, zero, 1 ; RV32I-FPELIM-NEXT: addi a0, sp, 24 -; RV32I-FPELIM-NEXT: mv a1, sp +; RV32I-FPELIM-NEXT: addi a1, sp, 8 ; RV32I-FPELIM-NEXT: sw a2, 24(sp) ; RV32I-FPELIM-NEXT: call callee_large_scalars@plt ; RV32I-FPELIM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload @@ -271,16 +271,16 @@ ; RV32I-WITHFP-NEXT: sw s0, 40(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: addi s0, sp, 48 ; RV32I-WITHFP-NEXT: lui a0, 524272 -; RV32I-WITHFP-NEXT: sw a0, -36(s0) +; RV32I-WITHFP-NEXT: sw a0, -28(s0) +; RV32I-WITHFP-NEXT: sw zero, -32(s0) +; RV32I-WITHFP-NEXT: sw zero, -36(s0) ; RV32I-WITHFP-NEXT: sw zero, -40(s0) -; RV32I-WITHFP-NEXT: sw zero, -44(s0) -; RV32I-WITHFP-NEXT: sw zero, -48(s0) ; RV32I-WITHFP-NEXT: sw zero, -12(s0) ; RV32I-WITHFP-NEXT: sw zero, -16(s0) ; RV32I-WITHFP-NEXT: sw zero, -20(s0) ; RV32I-WITHFP-NEXT: addi a2, zero, 1 ; RV32I-WITHFP-NEXT: addi a0, s0, -24 -; RV32I-WITHFP-NEXT: addi a1, s0, -48 +; RV32I-WITHFP-NEXT: addi a1, s0, -40 ; RV32I-WITHFP-NEXT: sw a2, -24(s0) ; RV32I-WITHFP-NEXT: call callee_large_scalars@plt ; RV32I-WITHFP-NEXT: lw s0, 40(sp) # 4-byte Folded Reload @@ -353,20 +353,20 @@ define i32 @caller_large_scalars_exhausted_regs() nounwind { ; RV32I-FPELIM-LABEL: caller_large_scalars_exhausted_regs: ; RV32I-FPELIM: # %bb.0: -; RV32I-FPELIM-NEXT: addi sp, sp, -64 -; RV32I-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; RV32I-FPELIM-NEXT: addi a0, sp, 16 +; RV32I-FPELIM-NEXT: addi sp, sp, -48 +; RV32I-FPELIM-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-FPELIM-NEXT: addi a0, sp, 8 ; RV32I-FPELIM-NEXT: sw a0, 4(sp) ; RV32I-FPELIM-NEXT: addi a0, zero, 9 ; RV32I-FPELIM-NEXT: sw a0, 0(sp) ; RV32I-FPELIM-NEXT: lui a0, 524272 -; RV32I-FPELIM-NEXT: sw a0, 28(sp) -; RV32I-FPELIM-NEXT: sw zero, 24(sp) -; RV32I-FPELIM-NEXT: sw zero, 20(sp) +; RV32I-FPELIM-NEXT: sw a0, 20(sp) ; RV32I-FPELIM-NEXT: sw zero, 16(sp) -; RV32I-FPELIM-NEXT: sw zero, 52(sp) -; RV32I-FPELIM-NEXT: sw zero, 48(sp) -; RV32I-FPELIM-NEXT: sw zero, 44(sp) +; RV32I-FPELIM-NEXT: sw zero, 12(sp) +; RV32I-FPELIM-NEXT: sw zero, 8(sp) +; RV32I-FPELIM-NEXT: sw zero, 36(sp) +; RV32I-FPELIM-NEXT: sw zero, 32(sp) +; RV32I-FPELIM-NEXT: sw zero, 28(sp) ; RV32I-FPELIM-NEXT: addi t0, zero, 8 ; RV32I-FPELIM-NEXT: addi a0, zero, 1 ; RV32I-FPELIM-NEXT: addi a1, zero, 2 @@ -375,28 +375,28 @@ ; RV32I-FPELIM-NEXT: addi a4, zero, 5 ; RV32I-FPELIM-NEXT: addi a5, zero, 6 ; RV32I-FPELIM-NEXT: addi a6, zero, 7 -; RV32I-FPELIM-NEXT: addi a7, sp, 40 -; RV32I-FPELIM-NEXT: sw t0, 40(sp) +; RV32I-FPELIM-NEXT: addi a7, sp, 24 +; RV32I-FPELIM-NEXT: sw t0, 24(sp) ; RV32I-FPELIM-NEXT: call callee_large_scalars_exhausted_regs@plt -; RV32I-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; RV32I-FPELIM-NEXT: addi sp, sp, 64 +; RV32I-FPELIM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-FPELIM-NEXT: addi sp, sp, 48 ; RV32I-FPELIM-NEXT: ret ; ; RV32I-WITHFP-LABEL: caller_large_scalars_exhausted_regs: ; RV32I-WITHFP: # %bb.0: -; RV32I-WITHFP-NEXT: addi sp, sp, -64 -; RV32I-WITHFP-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; RV32I-WITHFP-NEXT: sw s0, 56(sp) # 4-byte Folded Spill -; RV32I-WITHFP-NEXT: addi s0, sp, 64 -; RV32I-WITHFP-NEXT: addi a0, s0, -48 +; RV32I-WITHFP-NEXT: addi sp, sp, -48 +; RV32I-WITHFP-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-WITHFP-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; RV32I-WITHFP-NEXT: addi s0, sp, 48 +; RV32I-WITHFP-NEXT: addi a0, s0, -40 ; RV32I-WITHFP-NEXT: sw a0, 4(sp) ; RV32I-WITHFP-NEXT: addi a0, zero, 9 ; RV32I-WITHFP-NEXT: sw a0, 0(sp) ; RV32I-WITHFP-NEXT: lui a0, 524272 -; RV32I-WITHFP-NEXT: sw a0, -36(s0) +; RV32I-WITHFP-NEXT: sw a0, -28(s0) +; RV32I-WITHFP-NEXT: sw zero, -32(s0) +; RV32I-WITHFP-NEXT: sw zero, -36(s0) ; RV32I-WITHFP-NEXT: sw zero, -40(s0) -; RV32I-WITHFP-NEXT: sw zero, -44(s0) -; RV32I-WITHFP-NEXT: sw zero, -48(s0) ; RV32I-WITHFP-NEXT: sw zero, -12(s0) ; RV32I-WITHFP-NEXT: sw zero, -16(s0) ; RV32I-WITHFP-NEXT: sw zero, -20(s0) @@ -411,9 +411,9 @@ ; RV32I-WITHFP-NEXT: addi a7, s0, -24 ; RV32I-WITHFP-NEXT: sw t0, -24(s0) ; RV32I-WITHFP-NEXT: call callee_large_scalars_exhausted_regs@plt -; RV32I-WITHFP-NEXT: lw s0, 56(sp) # 4-byte Folded Reload -; RV32I-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; RV32I-WITHFP-NEXT: addi sp, sp, 64 +; RV32I-WITHFP-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; RV32I-WITHFP-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-WITHFP-NEXT: addi sp, sp, 48 ; RV32I-WITHFP-NEXT: ret %1 = call i32 @callee_large_scalars_exhausted_regs( i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i128 8, i32 9, @@ -682,25 +682,25 @@ ; RV32I-FPELIM-NEXT: sw a0, 0(sp) ; RV32I-FPELIM-NEXT: lui a0, 262153 ; RV32I-FPELIM-NEXT: addi a0, a0, 491 -; RV32I-FPELIM-NEXT: sw a0, 44(sp) +; RV32I-FPELIM-NEXT: sw a0, 52(sp) ; RV32I-FPELIM-NEXT: lui a0, 545260 ; RV32I-FPELIM-NEXT: addi a0, a0, -1967 -; RV32I-FPELIM-NEXT: sw a0, 40(sp) +; RV32I-FPELIM-NEXT: sw a0, 48(sp) ; RV32I-FPELIM-NEXT: lui a0, 964690 ; RV32I-FPELIM-NEXT: addi a0, a0, -328 -; RV32I-FPELIM-NEXT: sw a0, 36(sp) +; RV32I-FPELIM-NEXT: sw a0, 44(sp) ; RV32I-FPELIM-NEXT: lui a0, 335544 ; RV32I-FPELIM-NEXT: addi t0, a0, 1311 ; RV32I-FPELIM-NEXT: lui a0, 688509 ; RV32I-FPELIM-NEXT: addi a5, a0, -2048 ; RV32I-FPELIM-NEXT: addi a0, zero, 1 ; RV32I-FPELIM-NEXT: addi a1, zero, 11 -; RV32I-FPELIM-NEXT: addi a2, sp, 32 +; RV32I-FPELIM-NEXT: addi a2, sp, 40 ; RV32I-FPELIM-NEXT: addi a3, zero, 12 ; RV32I-FPELIM-NEXT: addi a4, zero, 13 ; RV32I-FPELIM-NEXT: addi a6, zero, 4 ; RV32I-FPELIM-NEXT: addi a7, zero, 14 -; RV32I-FPELIM-NEXT: sw t0, 32(sp) +; RV32I-FPELIM-NEXT: sw t0, 40(sp) ; RV32I-FPELIM-NEXT: call callee_aligned_stack@plt ; RV32I-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload ; RV32I-FPELIM-NEXT: addi sp, sp, 64 @@ -725,25 +725,25 @@ ; RV32I-WITHFP-NEXT: sw a0, 0(sp) ; RV32I-WITHFP-NEXT: lui a0, 262153 ; RV32I-WITHFP-NEXT: addi a0, a0, 491 -; RV32I-WITHFP-NEXT: sw a0, -20(s0) +; RV32I-WITHFP-NEXT: sw a0, -12(s0) ; RV32I-WITHFP-NEXT: lui a0, 545260 ; RV32I-WITHFP-NEXT: addi a0, a0, -1967 -; RV32I-WITHFP-NEXT: sw a0, -24(s0) +; RV32I-WITHFP-NEXT: sw a0, -16(s0) ; RV32I-WITHFP-NEXT: lui a0, 964690 ; RV32I-WITHFP-NEXT: addi a0, a0, -328 -; RV32I-WITHFP-NEXT: sw a0, -28(s0) +; RV32I-WITHFP-NEXT: sw a0, -20(s0) ; RV32I-WITHFP-NEXT: lui a0, 335544 ; RV32I-WITHFP-NEXT: addi t0, a0, 1311 ; RV32I-WITHFP-NEXT: lui a0, 688509 ; RV32I-WITHFP-NEXT: addi a5, a0, -2048 ; RV32I-WITHFP-NEXT: addi a0, zero, 1 ; RV32I-WITHFP-NEXT: addi a1, zero, 11 -; RV32I-WITHFP-NEXT: addi a2, s0, -32 +; RV32I-WITHFP-NEXT: addi a2, s0, -24 ; RV32I-WITHFP-NEXT: addi a3, zero, 12 ; RV32I-WITHFP-NEXT: addi a4, zero, 13 ; RV32I-WITHFP-NEXT: addi a6, zero, 4 ; RV32I-WITHFP-NEXT: addi a7, zero, 14 -; RV32I-WITHFP-NEXT: sw t0, -32(s0) +; RV32I-WITHFP-NEXT: sw t0, -24(s0) ; RV32I-WITHFP-NEXT: call callee_aligned_stack@plt ; RV32I-WITHFP-NEXT: lw s0, 56(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll @@ -807,15 +807,10 @@ ; ; LMULMAX4-LABEL: ret_v32i32_call_v32i32_v32i32_v32i32_i32: ; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: addi sp, sp, -256 -; LMULMAX4-NEXT: .cfi_def_cfa_offset 256 -; LMULMAX4-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; LMULMAX4-NEXT: sd s0, 240(sp) # 8-byte Folded Spill +; LMULMAX4-NEXT: addi sp, sp, -144 +; LMULMAX4-NEXT: .cfi_def_cfa_offset 144 +; LMULMAX4-NEXT: sd ra, 136(sp) # 8-byte Folded Spill ; LMULMAX4-NEXT: .cfi_offset ra, -8 -; LMULMAX4-NEXT: .cfi_offset s0, -16 -; LMULMAX4-NEXT: addi s0, sp, 256 -; LMULMAX4-NEXT: .cfi_def_cfa s0, 0 -; LMULMAX4-NEXT: andi sp, sp, -128 ; LMULMAX4-NEXT: vsetivli a1, 16, e32,m4,ta,mu ; LMULMAX4-NEXT: vle32.v v28, (a0) ; LMULMAX4-NEXT: addi a0, a0, 64 @@ -828,23 +823,16 @@ ; LMULMAX4-NEXT: vmv4r.v v8, v28 ; LMULMAX4-NEXT: vmv4r.v v12, v24 ; LMULMAX4-NEXT: call ext3@plt -; LMULMAX4-NEXT: addi sp, s0, -256 -; LMULMAX4-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; LMULMAX4-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; LMULMAX4-NEXT: addi sp, sp, 256 +; LMULMAX4-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; LMULMAX4-NEXT: addi sp, sp, 144 ; LMULMAX4-NEXT: ret ; ; LMULMAX2-LABEL: ret_v32i32_call_v32i32_v32i32_v32i32_i32: ; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: addi sp, sp, -256 -; LMULMAX2-NEXT: .cfi_def_cfa_offset 256 -; LMULMAX2-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; LMULMAX2-NEXT: sd s0, 240(sp) # 8-byte Folded Spill +; LMULMAX2-NEXT: addi sp, sp, -144 +; LMULMAX2-NEXT: .cfi_def_cfa_offset 144 +; LMULMAX2-NEXT: sd ra, 136(sp) # 8-byte Folded Spill ; LMULMAX2-NEXT: .cfi_offset ra, -8 -; LMULMAX2-NEXT: .cfi_offset s0, -16 -; LMULMAX2-NEXT: addi s0, sp, 256 -; LMULMAX2-NEXT: .cfi_def_cfa s0, 0 -; LMULMAX2-NEXT: andi sp, sp, -128 ; LMULMAX2-NEXT: vsetivli a1, 8, e32,m2,ta,mu ; LMULMAX2-NEXT: vle32.v v26, (a0) ; LMULMAX2-NEXT: addi a1, a0, 32 @@ -867,23 +855,16 @@ ; LMULMAX2-NEXT: vmv2r.v v12, v30 ; LMULMAX2-NEXT: vmv2r.v v14, v24 ; LMULMAX2-NEXT: call ext3@plt -; LMULMAX2-NEXT: addi sp, s0, -256 -; LMULMAX2-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; LMULMAX2-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; LMULMAX2-NEXT: addi sp, sp, 256 +; LMULMAX2-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; LMULMAX2-NEXT: addi sp, sp, 144 ; LMULMAX2-NEXT: ret ; ; LMULMAX1-LABEL: ret_v32i32_call_v32i32_v32i32_v32i32_i32: ; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi sp, sp, -384 -; LMULMAX1-NEXT: .cfi_def_cfa_offset 384 -; LMULMAX1-NEXT: sd ra, 376(sp) # 8-byte Folded Spill -; LMULMAX1-NEXT: sd s0, 368(sp) # 8-byte Folded Spill +; LMULMAX1-NEXT: addi sp, sp, -160 +; LMULMAX1-NEXT: .cfi_def_cfa_offset 160 +; LMULMAX1-NEXT: sd ra, 152(sp) # 8-byte Folded Spill ; LMULMAX1-NEXT: .cfi_offset ra, -8 -; LMULMAX1-NEXT: .cfi_offset s0, -16 -; LMULMAX1-NEXT: addi s0, sp, 384 -; LMULMAX1-NEXT: .cfi_def_cfa s0, 0 -; LMULMAX1-NEXT: andi sp, sp, -128 ; LMULMAX1-NEXT: vsetivli a1, 4, e32,m1,ta,mu ; LMULMAX1-NEXT: vle32.v v25, (a0) ; LMULMAX1-NEXT: addi a1, a0, 16 @@ -900,26 +881,26 @@ ; LMULMAX1-NEXT: vle32.v v31, (a1) ; LMULMAX1-NEXT: addi a0, a0, 112 ; LMULMAX1-NEXT: vle32.v v24, (a0) -; LMULMAX1-NEXT: ld a1, 0(s0) -; LMULMAX1-NEXT: addi a0, sp, 240 +; LMULMAX1-NEXT: ld a1, 160(sp) +; LMULMAX1-NEXT: addi a0, sp, 128 ; LMULMAX1-NEXT: vse32.v v15, (a0) -; LMULMAX1-NEXT: addi a0, sp, 224 +; LMULMAX1-NEXT: addi a0, sp, 112 ; LMULMAX1-NEXT: vse32.v v14, (a0) -; LMULMAX1-NEXT: addi a0, sp, 208 +; LMULMAX1-NEXT: addi a0, sp, 96 ; LMULMAX1-NEXT: vse32.v v13, (a0) -; LMULMAX1-NEXT: addi a0, sp, 192 +; LMULMAX1-NEXT: addi a0, sp, 80 ; LMULMAX1-NEXT: vse32.v v12, (a0) -; LMULMAX1-NEXT: addi a0, sp, 176 +; LMULMAX1-NEXT: addi a0, sp, 64 ; LMULMAX1-NEXT: vse32.v v11, (a0) -; LMULMAX1-NEXT: addi a0, sp, 160 +; LMULMAX1-NEXT: addi a0, sp, 48 ; LMULMAX1-NEXT: vse32.v v10, (a0) -; LMULMAX1-NEXT: addi a0, sp, 144 +; LMULMAX1-NEXT: addi a0, sp, 32 ; LMULMAX1-NEXT: vse32.v v9, (a0) -; LMULMAX1-NEXT: addi a0, sp, 128 +; LMULMAX1-NEXT: addi a0, sp, 16 ; LMULMAX1-NEXT: vse32.v v8, (a0) ; LMULMAX1-NEXT: addi a0, zero, 42 ; LMULMAX1-NEXT: sd a0, 8(sp) -; LMULMAX1-NEXT: addi a0, sp, 128 +; LMULMAX1-NEXT: addi a0, sp, 16 ; LMULMAX1-NEXT: sd a1, 0(sp) ; LMULMAX1-NEXT: vmv1r.v v8, v25 ; LMULMAX1-NEXT: vmv1r.v v9, v26 @@ -930,10 +911,8 @@ ; LMULMAX1-NEXT: vmv1r.v v14, v31 ; LMULMAX1-NEXT: vmv1r.v v15, v24 ; LMULMAX1-NEXT: call ext3@plt -; LMULMAX1-NEXT: addi sp, s0, -384 -; LMULMAX1-NEXT: ld s0, 368(sp) # 8-byte Folded Reload -; LMULMAX1-NEXT: ld ra, 376(sp) # 8-byte Folded Reload -; LMULMAX1-NEXT: addi sp, sp, 384 +; LMULMAX1-NEXT: ld ra, 152(sp) # 8-byte Folded Reload +; LMULMAX1-NEXT: addi sp, sp, 160 ; LMULMAX1-NEXT: ret %t = call <32 x i32> @ext3(<32 x i32> %z, <32 x i32> %y, <32 x i32> %x, i32 %w, i32 42) ret <32 x i32> %t @@ -1042,15 +1021,10 @@ ; ; LMULMAX4-LABEL: call_split_vector_args: ; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: addi sp, sp, -256 -; LMULMAX4-NEXT: .cfi_def_cfa_offset 256 -; LMULMAX4-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; LMULMAX4-NEXT: sd s0, 240(sp) # 8-byte Folded Spill +; LMULMAX4-NEXT: addi sp, sp, -144 +; LMULMAX4-NEXT: .cfi_def_cfa_offset 144 +; LMULMAX4-NEXT: sd ra, 136(sp) # 8-byte Folded Spill ; LMULMAX4-NEXT: .cfi_offset ra, -8 -; LMULMAX4-NEXT: .cfi_offset s0, -16 -; LMULMAX4-NEXT: addi s0, sp, 256 -; LMULMAX4-NEXT: .cfi_def_cfa s0, 0 -; LMULMAX4-NEXT: andi sp, sp, -128 ; LMULMAX4-NEXT: vsetivli a2, 2, e32,m1,ta,mu ; LMULMAX4-NEXT: vle32.v v8, (a0) ; LMULMAX4-NEXT: vsetivli a0, 16, e32,m4,ta,mu @@ -1066,23 +1040,16 @@ ; LMULMAX4-NEXT: vmv1r.v v11, v8 ; LMULMAX4-NEXT: vmv1r.v v12, v8 ; LMULMAX4-NEXT: call split_vector_args@plt -; LMULMAX4-NEXT: addi sp, s0, -256 -; LMULMAX4-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; LMULMAX4-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; LMULMAX4-NEXT: addi sp, sp, 256 +; LMULMAX4-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; LMULMAX4-NEXT: addi sp, sp, 144 ; LMULMAX4-NEXT: ret ; ; LMULMAX2-LABEL: call_split_vector_args: ; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: addi sp, sp, -256 -; LMULMAX2-NEXT: .cfi_def_cfa_offset 256 -; LMULMAX2-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; LMULMAX2-NEXT: sd s0, 240(sp) # 8-byte Folded Spill +; LMULMAX2-NEXT: addi sp, sp, -144 +; LMULMAX2-NEXT: .cfi_def_cfa_offset 144 +; LMULMAX2-NEXT: sd ra, 136(sp) # 8-byte Folded Spill ; LMULMAX2-NEXT: .cfi_offset ra, -8 -; LMULMAX2-NEXT: .cfi_offset s0, -16 -; LMULMAX2-NEXT: addi s0, sp, 256 -; LMULMAX2-NEXT: .cfi_def_cfa s0, 0 -; LMULMAX2-NEXT: andi sp, sp, -128 ; LMULMAX2-NEXT: vsetivli a2, 2, e32,m1,ta,mu ; LMULMAX2-NEXT: vle32.v v8, (a0) ; LMULMAX2-NEXT: vsetivli a0, 8, e32,m2,ta,mu @@ -1105,23 +1072,16 @@ ; LMULMAX2-NEXT: vmv1r.v v12, v8 ; LMULMAX2-NEXT: vmv2r.v v22, v14 ; LMULMAX2-NEXT: call split_vector_args@plt -; LMULMAX2-NEXT: addi sp, s0, -256 -; LMULMAX2-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; LMULMAX2-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; LMULMAX2-NEXT: addi sp, sp, 256 +; LMULMAX2-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; LMULMAX2-NEXT: addi sp, sp, 144 ; LMULMAX2-NEXT: ret ; ; LMULMAX1-LABEL: call_split_vector_args: ; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi sp, sp, -256 -; LMULMAX1-NEXT: .cfi_def_cfa_offset 256 -; LMULMAX1-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; LMULMAX1-NEXT: sd s0, 240(sp) # 8-byte Folded Spill +; LMULMAX1-NEXT: addi sp, sp, -144 +; LMULMAX1-NEXT: .cfi_def_cfa_offset 144 +; LMULMAX1-NEXT: sd ra, 136(sp) # 8-byte Folded Spill ; LMULMAX1-NEXT: .cfi_offset ra, -8 -; LMULMAX1-NEXT: .cfi_offset s0, -16 -; LMULMAX1-NEXT: addi s0, sp, 256 -; LMULMAX1-NEXT: .cfi_def_cfa s0, 0 -; LMULMAX1-NEXT: andi sp, sp, -128 ; LMULMAX1-NEXT: vsetivli a2, 2, e32,m1,ta,mu ; LMULMAX1-NEXT: vle32.v v8, (a0) ; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu @@ -1158,10 +1118,8 @@ ; LMULMAX1-NEXT: vmv1r.v v22, v14 ; LMULMAX1-NEXT: vmv1r.v v23, v15 ; LMULMAX1-NEXT: call split_vector_args@plt -; LMULMAX1-NEXT: addi sp, s0, -256 -; LMULMAX1-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; LMULMAX1-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; LMULMAX1-NEXT: addi sp, sp, 256 +; LMULMAX1-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; LMULMAX1-NEXT: addi sp, sp, 144 ; LMULMAX1-NEXT: ret %a = load <2 x i32>, <2 x i32>* %pa %b = load <32 x i32>, <32 x i32>* %pb diff --git a/llvm/test/CodeGen/RISCV/stack-slot-size.ll b/llvm/test/CodeGen/RISCV/stack-slot-size.ll --- a/llvm/test/CodeGen/RISCV/stack-slot-size.ll +++ b/llvm/test/CodeGen/RISCV/stack-slot-size.ll @@ -13,7 +13,6 @@ declare void @callee160(i160) declare void @callee161(i161) -; FIXME: Stack write clobbers the spilled value (on RV64). define i32 @caller129() nounwind { ; RV32I-LABEL: caller129: ; RV32I: # %bb.0: @@ -35,18 +34,18 @@ ; ; RV64I-LABEL: caller129: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -32 -; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; RV64I-NEXT: addi a0, zero, 42 -; RV64I-NEXT: sw a0, 20(sp) +; RV64I-NEXT: sw a0, 36(sp) ; RV64I-NEXT: sd zero, 16(sp) ; RV64I-NEXT: sd zero, 8(sp) ; RV64I-NEXT: mv a0, sp ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: call callee129@plt -; RV64I-NEXT: lw a0, 20(sp) -; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: lw a0, 36(sp) +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret %1 = alloca i32 store i32 42, i32* %1 @@ -55,7 +54,6 @@ ret i32 %2 } -; FIXME: Stack write clobbers the spilled value (on RV64). define i32 @caller160() nounwind { ; RV32I-LABEL: caller160: ; RV32I: # %bb.0: @@ -77,18 +75,18 @@ ; ; RV64I-LABEL: caller160: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -32 -; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; RV64I-NEXT: addi a0, zero, 42 -; RV64I-NEXT: sw a0, 20(sp) +; RV64I-NEXT: sw a0, 36(sp) ; RV64I-NEXT: sd zero, 16(sp) ; RV64I-NEXT: sd zero, 8(sp) ; RV64I-NEXT: mv a0, sp ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: call callee160@plt -; RV64I-NEXT: lw a0, 20(sp) -; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: lw a0, 36(sp) +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret %1 = alloca i32 store i32 42, i32* %1 diff --git a/llvm/test/CodeGen/RISCV/vararg.ll b/llvm/test/CodeGen/RISCV/vararg.ll --- a/llvm/test/CodeGen/RISCV/vararg.ll +++ b/llvm/test/CodeGen/RISCV/vararg.ll @@ -1412,24 +1412,24 @@ ; ILP32-ILP32F-FPELIM-NEXT: sw a0, 0(sp) ; ILP32-ILP32F-FPELIM-NEXT: lui a0, 262153 ; ILP32-ILP32F-FPELIM-NEXT: addi a0, a0, 491 -; ILP32-ILP32F-FPELIM-NEXT: sw a0, 44(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a0, 52(sp) ; ILP32-ILP32F-FPELIM-NEXT: lui a0, 545260 ; ILP32-ILP32F-FPELIM-NEXT: addi a0, a0, -1967 -; ILP32-ILP32F-FPELIM-NEXT: sw a0, 40(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a0, 48(sp) ; ILP32-ILP32F-FPELIM-NEXT: lui a0, 964690 ; ILP32-ILP32F-FPELIM-NEXT: addi a0, a0, -328 -; ILP32-ILP32F-FPELIM-NEXT: sw a0, 36(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a0, 44(sp) ; ILP32-ILP32F-FPELIM-NEXT: lui a0, 335544 ; ILP32-ILP32F-FPELIM-NEXT: addi a5, a0, 1311 ; ILP32-ILP32F-FPELIM-NEXT: lui a0, 688509 ; ILP32-ILP32F-FPELIM-NEXT: addi a6, a0, -2048 ; ILP32-ILP32F-FPELIM-NEXT: addi a0, zero, 1 ; ILP32-ILP32F-FPELIM-NEXT: addi a1, zero, 11 -; ILP32-ILP32F-FPELIM-NEXT: addi a2, sp, 32 +; ILP32-ILP32F-FPELIM-NEXT: addi a2, sp, 40 ; ILP32-ILP32F-FPELIM-NEXT: addi a3, zero, 12 ; ILP32-ILP32F-FPELIM-NEXT: addi a4, zero, 13 ; ILP32-ILP32F-FPELIM-NEXT: addi a7, zero, 4 -; ILP32-ILP32F-FPELIM-NEXT: sw a5, 32(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a5, 40(sp) ; ILP32-ILP32F-FPELIM-NEXT: call va5_aligned_stack_callee@plt ; ILP32-ILP32F-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload ; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, 64 @@ -1457,24 +1457,24 @@ ; ILP32-ILP32F-WITHFP-NEXT: sw a0, 0(sp) ; ILP32-ILP32F-WITHFP-NEXT: lui a0, 262153 ; ILP32-ILP32F-WITHFP-NEXT: addi a0, a0, 491 -; ILP32-ILP32F-WITHFP-NEXT: sw a0, -20(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a0, -12(s0) ; ILP32-ILP32F-WITHFP-NEXT: lui a0, 545260 ; ILP32-ILP32F-WITHFP-NEXT: addi a0, a0, -1967 -; ILP32-ILP32F-WITHFP-NEXT: sw a0, -24(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a0, -16(s0) ; ILP32-ILP32F-WITHFP-NEXT: lui a0, 964690 ; ILP32-ILP32F-WITHFP-NEXT: addi a0, a0, -328 -; ILP32-ILP32F-WITHFP-NEXT: sw a0, -28(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a0, -20(s0) ; ILP32-ILP32F-WITHFP-NEXT: lui a0, 335544 ; ILP32-ILP32F-WITHFP-NEXT: addi a5, a0, 1311 ; ILP32-ILP32F-WITHFP-NEXT: lui a0, 688509 ; ILP32-ILP32F-WITHFP-NEXT: addi a6, a0, -2048 ; ILP32-ILP32F-WITHFP-NEXT: addi a0, zero, 1 ; ILP32-ILP32F-WITHFP-NEXT: addi a1, zero, 11 -; ILP32-ILP32F-WITHFP-NEXT: addi a2, s0, -32 +; ILP32-ILP32F-WITHFP-NEXT: addi a2, s0, -24 ; ILP32-ILP32F-WITHFP-NEXT: addi a3, zero, 12 ; ILP32-ILP32F-WITHFP-NEXT: addi a4, zero, 13 ; ILP32-ILP32F-WITHFP-NEXT: addi a7, zero, 4 -; ILP32-ILP32F-WITHFP-NEXT: sw a5, -32(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a5, -24(s0) ; ILP32-ILP32F-WITHFP-NEXT: call va5_aligned_stack_callee@plt ; ILP32-ILP32F-WITHFP-NEXT: lw s0, 56(sp) # 4-byte Folded Reload ; ILP32-ILP32F-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload @@ -1501,24 +1501,24 @@ ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 0(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a0, 262153 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, a0, 491 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 44(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 52(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a0, 545260 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, a0, -1967 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 40(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 48(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a0, 964690 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, a0, -328 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 36(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 44(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a0, 335544 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a5, a0, 1311 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a0, 688509 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a6, a0, -2048 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a0, zero, 1 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a1, zero, 11 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a2, sp, 32 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a2, sp, 40 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a3, zero, 12 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a4, zero, 13 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a7, zero, 4 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 32(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 40(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: call va5_aligned_stack_callee@plt ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 64