diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -6090,12 +6090,13 @@ bool IsVarArg = CLI.IsVarArg; EVT PtrVT = getPointerTy(DAG.getDataLayout()); MVT XLenVT = Subtarget.getXLenVT(); + LLVMContext &Ctx = *DAG.getContext(); MachineFunction &MF = DAG.getMachineFunction(); // Analyze the operands of the call, assigning locations to each operand. SmallVector ArgLocs; - CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); + CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx); if (CallConv == CallingConv::Fast) ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC); @@ -6188,7 +6189,26 @@ // For now, only handle fully promoted and indirect arguments. if (VA.getLocInfo() == CCValAssign::Indirect) { // Store the argument in a stack slot and pass its address. - SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT); + unsigned ArgIndex = Outs[i].OrigArgIndex; + EVT SlotVT; + Align SlotAlign = DAG.getEVTAlign(ArgValue.getValueType()); + SlotAlign = std::max(DAG.getEVTAlign(Outs[i].ArgVT), SlotAlign); + if (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { + // Allocate the full stack space for a promoted (and split) argument. + Type *OrigArgType = CLI.Args[Outs[i].OrigArgIndex].Ty; + EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType); + MVT PartVT = + getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT); + unsigned N = + getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT); + SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N); + Type *SlotTy = SlotVT.getTypeForEVT(*DAG.getContext()); + SlotAlign = + std::max(SlotAlign, DAG.getDataLayout().getPrefTypeAlign(SlotTy)); + } else { + SlotVT = Outs[i].ArgVT; + } + SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT, SlotAlign.value()); int FI = cast(SpillSlot)->getIndex(); MemOpChains.push_back( DAG.getStore(Chain, DL, ArgValue, SpillSlot, @@ -6198,7 +6218,6 @@ // Vectors may be partly split to registers and partly to the stack, in // which case the base address is partly offset and subsequent stores are // relative to that. - unsigned ArgIndex = Outs[i].OrigArgIndex; unsigned ArgPartOffset = Outs[i].PartOffset; assert(VA.getValVT().isVector() || ArgPartOffset == 0); while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) { diff --git a/llvm/test/CodeGen/RISCV/stack-slot-size.ll b/llvm/test/CodeGen/RISCV/stack-slot-size.ll --- a/llvm/test/CodeGen/RISCV/stack-slot-size.ll +++ b/llvm/test/CodeGen/RISCV/stack-slot-size.ll @@ -13,7 +13,6 @@ declare void @callee160(i160) declare void @callee161(i161) -; FIXME: Stack write clobbers the spilled value (on RV64). define i32 @caller129() nounwind { ; RV32I-LABEL: caller129: ; RV32I: # %bb.0: @@ -35,18 +34,18 @@ ; ; RV64I-LABEL: caller129: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -32 -; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; RV64I-NEXT: addi a0, zero, 42 -; RV64I-NEXT: sw a0, 20(sp) +; RV64I-NEXT: sw a0, 36(sp) ; RV64I-NEXT: sd zero, 16(sp) ; RV64I-NEXT: sd zero, 8(sp) ; RV64I-NEXT: mv a0, sp ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: call callee129@plt -; RV64I-NEXT: lw a0, 20(sp) -; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: lw a0, 36(sp) +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret %1 = alloca i32 store i32 42, i32* %1 @@ -55,7 +54,6 @@ ret i32 %2 } -; FIXME: Stack write clobbers the spilled value (on RV64). define i32 @caller160() nounwind { ; RV32I-LABEL: caller160: ; RV32I: # %bb.0: @@ -77,18 +75,18 @@ ; ; RV64I-LABEL: caller160: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -32 -; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: addi sp, sp, -48 +; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; RV64I-NEXT: addi a0, zero, 42 -; RV64I-NEXT: sw a0, 20(sp) +; RV64I-NEXT: sw a0, 36(sp) ; RV64I-NEXT: sd zero, 16(sp) ; RV64I-NEXT: sd zero, 8(sp) ; RV64I-NEXT: mv a0, sp ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: call callee160@plt -; RV64I-NEXT: lw a0, 20(sp) -; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: lw a0, 36(sp) +; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 48 ; RV64I-NEXT: ret %1 = alloca i32 store i32 42, i32* %1