diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -6643,6 +6643,11 @@ return true; } +static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) { + return DAG.getDataLayout().getPrefTypeAlign( + VT.getTypeForEVT(*DAG.getContext())); +} + // Lower a call to a callseq_start + CALL + callseq_end chain, and add input // and output parameter nodes. SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, @@ -6757,11 +6762,10 @@ // For now, only handle fully promoted and indirect arguments. if (VA.getLocInfo() == CCValAssign::Indirect) { // Store the argument in a stack slot and pass its address. - SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT); - int FI = cast(SpillSlot)->getIndex(); - MemOpChains.push_back( - DAG.getStore(Chain, DL, ArgValue, SpillSlot, - MachinePointerInfo::getFixedStack(MF, FI))); + Align StackAlign = + std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG), + getPrefTypeAlign(ArgValue.getValueType(), DAG)); + TypeSize StoredSize = ArgValue.getValueType().getStoreSize(); // If the original argument was split (e.g. i128), we need // to store the required parts of it here (and pass just one address). // Vectors may be partly split to registers and partly to the stack, in @@ -6770,15 +6774,32 @@ unsigned ArgIndex = Outs[i].OrigArgIndex; unsigned ArgPartOffset = Outs[i].PartOffset; assert(VA.getValVT().isVector() || ArgPartOffset == 0); + // Calculate the total size to store. We don't have access to what we're + // actually storing other than performing the loop and collecting the + // info. + SmallVector> Parts; while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { SDValue PartValue = OutVals[i + 1]; unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset; + EVT PartVT = PartValue.getValueType(); + StoredSize += PartVT.getStoreSize(); + StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG)); + Parts.push_back(std::make_pair(PartValue, PartOffset)); + ++i; + } + SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign); + int FI = cast(SpillSlot)->getIndex(); + MemOpChains.push_back( + DAG.getStore(Chain, DL, ArgValue, SpillSlot, + MachinePointerInfo::getFixedStack(MF, FI))); + for (const auto &Part : Parts) { + SDValue PartValue = Part.first; + unsigned PartOffset = Part.second; SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, DAG.getIntPtrConstant(PartOffset, DL)); MemOpChains.push_back( DAG.getStore(Chain, DL, PartValue, Address, MachinePointerInfo::getFixedStack(MF, FI))); - ++i; } ArgValue = SpillSlot; } else { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll @@ -1074,13 +1074,13 @@ ; ; LMULMAX2-LABEL: call_split_vector_args: ; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: addi sp, sp, -256 -; LMULMAX2-NEXT: .cfi_def_cfa_offset 256 -; LMULMAX2-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; LMULMAX2-NEXT: sd s0, 240(sp) # 8-byte Folded Spill +; LMULMAX2-NEXT: addi sp, sp, -128 +; LMULMAX2-NEXT: .cfi_def_cfa_offset 128 +; LMULMAX2-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; LMULMAX2-NEXT: sd s0, 112(sp) # 8-byte Folded Spill ; LMULMAX2-NEXT: .cfi_offset ra, -8 ; LMULMAX2-NEXT: .cfi_offset s0, -16 -; LMULMAX2-NEXT: addi s0, sp, 256 +; LMULMAX2-NEXT: addi s0, sp, 128 ; LMULMAX2-NEXT: .cfi_def_cfa s0, 0 ; LMULMAX2-NEXT: andi sp, sp, -128 ; LMULMAX2-NEXT: vsetivli a2, 2, e32,m1,ta,mu @@ -1105,21 +1105,21 @@ ; LMULMAX2-NEXT: vmv1r.v v12, v8 ; LMULMAX2-NEXT: vmv2r.v v22, v14 ; LMULMAX2-NEXT: call split_vector_args@plt -; LMULMAX2-NEXT: addi sp, s0, -256 -; LMULMAX2-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; LMULMAX2-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; LMULMAX2-NEXT: addi sp, sp, 256 +; LMULMAX2-NEXT: addi sp, s0, -128 +; LMULMAX2-NEXT: ld s0, 112(sp) # 8-byte Folded Reload +; LMULMAX2-NEXT: ld ra, 120(sp) # 8-byte Folded Reload +; LMULMAX2-NEXT: addi sp, sp, 128 ; LMULMAX2-NEXT: ret ; ; LMULMAX1-LABEL: call_split_vector_args: ; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi sp, sp, -256 -; LMULMAX1-NEXT: .cfi_def_cfa_offset 256 -; LMULMAX1-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; LMULMAX1-NEXT: sd s0, 240(sp) # 8-byte Folded Spill +; LMULMAX1-NEXT: addi sp, sp, -128 +; LMULMAX1-NEXT: .cfi_def_cfa_offset 128 +; LMULMAX1-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; LMULMAX1-NEXT: sd s0, 112(sp) # 8-byte Folded Spill ; LMULMAX1-NEXT: .cfi_offset ra, -8 ; LMULMAX1-NEXT: .cfi_offset s0, -16 -; LMULMAX1-NEXT: addi s0, sp, 256 +; LMULMAX1-NEXT: addi s0, sp, 128 ; LMULMAX1-NEXT: .cfi_def_cfa s0, 0 ; LMULMAX1-NEXT: andi sp, sp, -128 ; LMULMAX1-NEXT: vsetivli a2, 2, e32,m1,ta,mu @@ -1158,10 +1158,10 @@ ; LMULMAX1-NEXT: vmv1r.v v22, v14 ; LMULMAX1-NEXT: vmv1r.v v23, v15 ; LMULMAX1-NEXT: call split_vector_args@plt -; LMULMAX1-NEXT: addi sp, s0, -256 -; LMULMAX1-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; LMULMAX1-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; LMULMAX1-NEXT: addi sp, sp, 256 +; LMULMAX1-NEXT: addi sp, s0, -128 +; LMULMAX1-NEXT: ld s0, 112(sp) # 8-byte Folded Reload +; LMULMAX1-NEXT: ld ra, 120(sp) # 8-byte Folded Reload +; LMULMAX1-NEXT: addi sp, sp, 128 ; LMULMAX1-NEXT: ret %a = load <2 x i32>, <2 x i32>* %pa %b = load <32 x i32>, <32 x i32>* %pb diff --git a/llvm/test/CodeGen/RISCV/stack-slot-size.ll b/llvm/test/CodeGen/RISCV/stack-slot-size.ll --- a/llvm/test/CodeGen/RISCV/stack-slot-size.ll +++ b/llvm/test/CodeGen/RISCV/stack-slot-size.ll @@ -13,7 +13,6 @@ declare void @callee160(i160) declare void @callee161(i161) -; FIXME: Stack write clobbers the spilled value (on RV64). define i32 @caller129() nounwind { ; RV32I-LABEL: caller129: ; RV32I: # %bb.0: @@ -35,18 +34,18 @@ ; ; RV64I-LABEL: caller129: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -32 -; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; RV64I-NEXT: addi a0, zero, 42 -; RV64I-NEXT: sw a0, 20(sp) +; RV64I-NEXT: sw a0, 36(sp) ; RV64I-NEXT: sd zero, 16(sp) ; RV64I-NEXT: sd zero, 8(sp) ; RV64I-NEXT: mv a0, sp ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: call callee129@plt -; RV64I-NEXT: lw a0, 20(sp) -; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: lw a0, 36(sp) +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret %1 = alloca i32 store i32 42, i32* %1 @@ -55,7 +54,6 @@ ret i32 %2 } -; FIXME: Stack write clobbers the spilled value (on RV64). define i32 @caller160() nounwind { ; RV32I-LABEL: caller160: ; RV32I: # %bb.0: @@ -77,18 +75,18 @@ ; ; RV64I-LABEL: caller160: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -32 -; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; RV64I-NEXT: addi a0, zero, 42 -; RV64I-NEXT: sw a0, 20(sp) +; RV64I-NEXT: sw a0, 36(sp) ; RV64I-NEXT: sd zero, 16(sp) ; RV64I-NEXT: sd zero, 8(sp) ; RV64I-NEXT: mv a0, sp ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: call callee160@plt -; RV64I-NEXT: lw a0, 20(sp) -; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: lw a0, 36(sp) +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret %1 = alloca i32 store i32 42, i32* %1 diff --git a/llvm/test/CodeGen/RISCV/vector-abi.ll b/llvm/test/CodeGen/RISCV/vector-abi.ll --- a/llvm/test/CodeGen/RISCV/vector-abi.ll +++ b/llvm/test/CodeGen/RISCV/vector-abi.ll @@ -1,15 +1,12 @@ ; RUN: llc -mtriple=riscv32 -stop-after finalize-isel < %s | FileCheck %s -check-prefix=RV32 ; RUN: llc -mtriple=riscv64 -stop-after finalize-isel < %s | FileCheck %s -check-prefix=RV64 -; FIXME: The stack location used to pass the parameter to the function has the -; incorrect size and alignment for how we use it, and we clobber the stack. - declare void @callee(<4 x i8> %v) define void @caller() { ; RV32-LABEL: name: caller ; RV32: stack: - ; RV32: - { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4, + ; RV32: - { id: 0, name: '', type: default, offset: 0, size: 16, alignment: 4, ; RV32-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true, ; RV32-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } ; RV32: bb.0 (%ir-block.0): @@ -29,7 +26,7 @@ ; RV32: PseudoRET ; RV64-LABEL: name: caller ; RV64: stack: - ; RV64: - { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4, + ; RV64: - { id: 0, name: '', type: default, offset: 0, size: 32, alignment: 8, ; RV64-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true, ; RV64-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } ; RV64: bb.0 (%ir-block.0):