diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -6188,11 +6188,9 @@ // For now, only handle fully promoted and indirect arguments. if (VA.getLocInfo() == CCValAssign::Indirect) { // Store the argument in a stack slot and pass its address. - SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT); - int FI = cast(SpillSlot)->getIndex(); - MemOpChains.push_back( - DAG.getStore(Chain, DL, ArgValue, SpillSlot, - MachinePointerInfo::getFixedStack(MF, FI))); + Align StackAlign = DAG.getEVTAlign(ArgValue.getValueType()); + StackAlign = std::max(DAG.getEVTAlign(Outs[i].ArgVT), StackAlign); + TypeSize StoredSize = ArgValue.getValueType().getStoreSize(); // If the original argument was split (e.g. i128), we need // to store the required parts of it here (and pass just one address). // Vectors may be partly split to registers and partly to the stack, in @@ -6201,15 +6199,32 @@ unsigned ArgIndex = Outs[i].OrigArgIndex; unsigned ArgPartOffset = Outs[i].PartOffset; assert(VA.getValVT().isVector() || ArgPartOffset == 0); + // Calculate the total size to store. We don't have access to what we're + // actually storing other than performing the loop and collecting the + // info. + SmallVector> Parts; while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { SDValue PartValue = OutVals[i + 1]; unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset; + EVT PartVT = PartValue.getValueType(); + StoredSize += PartVT.getStoreSize(); + StackAlign = std::max(DAG.getEVTAlign(PartVT), StackAlign); + Parts.push_back(std::make_pair(PartValue, PartOffset)); + ++i; + } + SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign); + int FI = cast(SpillSlot)->getIndex(); + MemOpChains.push_back( + DAG.getStore(Chain, DL, ArgValue, SpillSlot, + MachinePointerInfo::getFixedStack(MF, FI))); + for (const auto &Part : Parts) { + SDValue PartValue = Part.first; + unsigned PartOffset = Part.second; SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, DAG.getIntPtrConstant(PartOffset, DL)); MemOpChains.push_back( DAG.getStore(Chain, DL, PartValue, Address, MachinePointerInfo::getFixedStack(MF, FI))); - ++i; } ArgValue = SpillSlot; } else { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll @@ -1074,13 +1074,13 @@ ; ; LMULMAX2-LABEL: call_split_vector_args: ; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: addi sp, sp, -256 -; LMULMAX2-NEXT: .cfi_def_cfa_offset 256 -; LMULMAX2-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; LMULMAX2-NEXT: sd s0, 240(sp) # 8-byte Folded Spill +; LMULMAX2-NEXT: addi sp, sp, -128 +; LMULMAX2-NEXT: .cfi_def_cfa_offset 128 +; LMULMAX2-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; LMULMAX2-NEXT: sd s0, 112(sp) # 8-byte Folded Spill ; LMULMAX2-NEXT: .cfi_offset ra, -8 ; LMULMAX2-NEXT: .cfi_offset s0, -16 -; LMULMAX2-NEXT: addi s0, sp, 256 +; LMULMAX2-NEXT: addi s0, sp, 128 ; LMULMAX2-NEXT: .cfi_def_cfa s0, 0 ; LMULMAX2-NEXT: andi sp, sp, -128 ; LMULMAX2-NEXT: vsetivli a2, 2, e32,m1,ta,mu @@ -1105,21 +1105,21 @@ ; LMULMAX2-NEXT: vmv1r.v v12, v8 ; LMULMAX2-NEXT: vmv2r.v v22, v14 ; LMULMAX2-NEXT: call split_vector_args@plt -; LMULMAX2-NEXT: addi sp, s0, -256 -; LMULMAX2-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; LMULMAX2-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; LMULMAX2-NEXT: addi sp, sp, 256 +; LMULMAX2-NEXT: addi sp, s0, -128 +; LMULMAX2-NEXT: ld s0, 112(sp) # 8-byte Folded Reload +; LMULMAX2-NEXT: ld ra, 120(sp) # 8-byte Folded Reload +; LMULMAX2-NEXT: addi sp, sp, 128 ; LMULMAX2-NEXT: ret ; ; LMULMAX1-LABEL: call_split_vector_args: ; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi sp, sp, -256 -; LMULMAX1-NEXT: .cfi_def_cfa_offset 256 -; LMULMAX1-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; LMULMAX1-NEXT: sd s0, 240(sp) # 8-byte Folded Spill +; LMULMAX1-NEXT: addi sp, sp, -128 +; LMULMAX1-NEXT: .cfi_def_cfa_offset 128 +; LMULMAX1-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; LMULMAX1-NEXT: sd s0, 112(sp) # 8-byte Folded Spill ; LMULMAX1-NEXT: .cfi_offset ra, -8 ; LMULMAX1-NEXT: .cfi_offset s0, -16 -; LMULMAX1-NEXT: addi s0, sp, 256 +; LMULMAX1-NEXT: addi s0, sp, 128 ; LMULMAX1-NEXT: .cfi_def_cfa s0, 0 ; LMULMAX1-NEXT: andi sp, sp, -128 ; LMULMAX1-NEXT: vsetivli a2, 2, e32,m1,ta,mu @@ -1158,10 +1158,10 @@ ; LMULMAX1-NEXT: vmv1r.v v22, v14 ; LMULMAX1-NEXT: vmv1r.v v23, v15 ; LMULMAX1-NEXT: call split_vector_args@plt -; LMULMAX1-NEXT: addi sp, s0, -256 -; LMULMAX1-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; LMULMAX1-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; LMULMAX1-NEXT: addi sp, sp, 256 +; LMULMAX1-NEXT: addi sp, s0, -128 +; LMULMAX1-NEXT: ld s0, 112(sp) # 8-byte Folded Reload +; LMULMAX1-NEXT: ld ra, 120(sp) # 8-byte Folded Reload +; LMULMAX1-NEXT: addi sp, sp, 128 ; LMULMAX1-NEXT: ret %a = load <2 x i32>, <2 x i32>* %pa %b = load <32 x i32>, <32 x i32>* %pb diff --git a/llvm/test/CodeGen/RISCV/stack-slot-size.ll b/llvm/test/CodeGen/RISCV/stack-slot-size.ll --- a/llvm/test/CodeGen/RISCV/stack-slot-size.ll +++ b/llvm/test/CodeGen/RISCV/stack-slot-size.ll @@ -13,7 +13,6 @@ declare void @callee160(i160) declare void @callee161(i161) -; FIXME: Stack write clobbers the spilled value (on RV64). define i32 @caller129() nounwind { ; RV32I-LABEL: caller129: ; RV32I: # %bb.0: @@ -35,18 +34,18 @@ ; ; RV64I-LABEL: caller129: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -32 -; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; RV64I-NEXT: addi a0, zero, 42 -; RV64I-NEXT: sw a0, 20(sp) +; RV64I-NEXT: sw a0, 36(sp) ; RV64I-NEXT: sd zero, 16(sp) ; RV64I-NEXT: sd zero, 8(sp) ; RV64I-NEXT: mv a0, sp ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: call callee129@plt -; RV64I-NEXT: lw a0, 20(sp) -; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: lw a0, 36(sp) +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret %1 = alloca i32 store i32 42, i32* %1 @@ -55,7 +54,6 @@ ret i32 %2 } -; FIXME: Stack write clobbers the spilled value (on RV64). define i32 @caller160() nounwind { ; RV32I-LABEL: caller160: ; RV32I: # %bb.0: @@ -77,18 +75,18 @@ ; ; RV64I-LABEL: caller160: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -32 -; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; RV64I-NEXT: addi a0, zero, 42 -; RV64I-NEXT: sw a0, 20(sp) +; RV64I-NEXT: sw a0, 36(sp) ; RV64I-NEXT: sd zero, 16(sp) ; RV64I-NEXT: sd zero, 8(sp) ; RV64I-NEXT: mv a0, sp ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: call callee160@plt -; RV64I-NEXT: lw a0, 20(sp) -; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: lw a0, 36(sp) +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret %1 = alloca i32 store i32 42, i32* %1 diff --git a/llvm/test/CodeGen/RISCV/vector-abi.ll b/llvm/test/CodeGen/RISCV/vector-abi.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/vector-abi.ll @@ -0,0 +1,52 @@ +; RUN: llc -mtriple=riscv32 -stop-after finalize-isel < %s | FileCheck %s -check-prefix=RV32 +; RUN: llc -mtriple=riscv64 -stop-after finalize-isel < %s | FileCheck %s -check-prefix=RV64 + +; FIXME: The stack location used to pass the parameter to the function has the +; incorrect size and alignment for how we use it, and we clobber the stack. + +declare void @callee(<4 x i8> %v) + +define void @caller() { + ; RV32-LABEL: name: caller + ; RV32: stack: + ; RV32: - { id: 0, name: '', type: default, offset: 0, size: 16, alignment: 4, + ; RV32-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true, + ; RV32-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + ; RV32: bb.0 (%ir-block.0): + ; RV32: ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 + ; RV32: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 7 + ; RV32: SW killed [[ADDI]], %stack.0, 12 :: (store 4 into %stack.0) + ; RV32: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 6 + ; RV32: SW killed [[ADDI1]], %stack.0, 8 :: (store 4 into %stack.0) + ; RV32: [[ADDI2:%[0-9]+]]:gpr = ADDI $x0, 5 + ; RV32: SW killed [[ADDI2]], %stack.0, 4 :: (store 4 into %stack.0) + ; RV32: [[ADDI3:%[0-9]+]]:gpr = ADDI $x0, 4 + ; RV32: SW killed [[ADDI3]], %stack.0, 0 :: (store 4 into %stack.0) + ; RV32: [[ADDI4:%[0-9]+]]:gpr = ADDI %stack.0, 0 + ; RV32: $x10 = COPY [[ADDI4]] + ; RV32: PseudoCALL target-flags(riscv-plt) @callee, csr_ilp32_lp64, implicit-def dead $x1, implicit $x10, implicit-def $x2 + ; RV32: ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2 + ; RV32: PseudoRET + ; RV64-LABEL: name: caller + ; RV64: stack: + ; RV64: - { id: 0, name: '', type: default, offset: 0, size: 32, alignment: 8, + ; RV64-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true, + ; RV64-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + ; RV64: bb.0 (%ir-block.0): + ; RV64: ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 + ; RV64: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 7 + ; RV64: SD killed [[ADDI]], %stack.0, 24 :: (store 8 into %stack.0) + ; RV64: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 6 + ; RV64: SD killed [[ADDI1]], %stack.0, 16 :: (store 8 into %stack.0) + ; RV64: [[ADDI2:%[0-9]+]]:gpr = ADDI $x0, 5 + ; RV64: SD killed [[ADDI2]], %stack.0, 8 :: (store 8 into %stack.0) + ; RV64: [[ADDI3:%[0-9]+]]:gpr = ADDI $x0, 4 + ; RV64: SD killed [[ADDI3]], %stack.0, 0 :: (store 8 into %stack.0) + ; RV64: [[ADDI4:%[0-9]+]]:gpr = ADDI %stack.0, 0 + ; RV64: $x10 = COPY [[ADDI4]] + ; RV64: PseudoCALL target-flags(riscv-plt) @callee, csr_ilp32_lp64, implicit-def dead $x1, implicit $x10, implicit-def $x2 + ; RV64: ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2 + ; RV64: PseudoRET + call void @callee(<4 x i8> ) + ret void +}