Index: llvm/lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1547,6 +1547,7 @@ bool IsVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); EVT PtrVT = getPointerTy(MF.getDataLayout()); + LLVMContext &Ctx = *DAG.getContext(); // Detect unsupported vector argument and return types. if (Subtarget.hasVector()) { @@ -1556,7 +1557,7 @@ // Analyze the operands of the call, assigning locations to each operand. SmallVector ArgLocs; - SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); + SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx); ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ); // We don't support GuaranteedTailCallOpt, only automatically-detected @@ -1581,7 +1582,17 @@ if (VA.getLocInfo() == CCValAssign::Indirect) { // Store the argument in a stack slot and pass its address. - SDValue SpillSlot = DAG.CreateStackTemporary(Outs[I].ArgVT); + Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty; + EVT SlotVT = getValueType(MF.getDataLayout(), OrigArgType); + if (SlotVT.getSizeInBits() > 64 && + getTypeAction(Ctx, SlotVT) == TypePromoteInteger) { + // Allocate the full stack space for a promoted (and split) argument. + MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, SlotVT); + unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, SlotVT); + SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N); + } + SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT); + int FI = cast(SpillSlot)->getIndex(); MemOpChains.push_back( DAG.getStore(Chain, DL, ArgValue, SpillSlot, @@ -1598,6 +1609,8 @@ MemOpChains.push_back( DAG.getStore(Chain, DL, PartValue, Address, MachinePointerInfo::getFixedStack(MF, FI))); + assert((PartOffset + PartValue.getValueType().getStoreSize() <= + SlotVT.getStoreSize()) && "Not enough space for argument part!"); ++I; } ArgValue = SpillSlot; @@ -1691,7 +1704,7 @@ // Assign locations to each value returned by this call. SmallVector RetLocs; - CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext()); + CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx); RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ); // Copy all of the result registers out of their specified physreg. Index: llvm/test/CodeGen/SystemZ/args-11.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/args-11.ll @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; Test outgoing promoted arguments that are split (and passed by reference). +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; The i96 arg is promoted to i128 and should get the full stack space. +declare void @fn1(i96) +define i32 @fn2() { +; CHECK-LABEL: fn2: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -184 +; CHECK-NEXT: .cfi_def_cfa_offset 344 +; CHECK-NEXT: mvhi 180(%r15), -1 +; CHECK-NEXT: mvghi 168(%r15), 0 +; CHECK-NEXT: la %r2, 160(%r15) +; CHECK-NEXT: mvghi 160(%r15), 0 +; CHECK-NEXT: brasl %r14, fn1@PLT +; CHECK-NEXT: l %r2, 180(%r15) +; CHECK-NEXT: lmg %r14, %r15, 296(%r15) +; CHECK-NEXT: br %r14 + %1 = alloca i32 + store i32 -1, i32* %1 + call void @fn1(i96 0) + %2 = load i32, i32* %1 + ret i32 %2 +} + +declare void @fn3(i136) +define i32 @fn4() { +; CHECK-LABEL: fn4: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -192 +; CHECK-NEXT: .cfi_def_cfa_offset 352 +; CHECK-NEXT: mvhi 188(%r15), -1 +; CHECK-NEXT: mvghi 176(%r15), 0 +; CHECK-NEXT: mvghi 168(%r15), 0 +; CHECK-NEXT: la %r2, 160(%r15) +; CHECK-NEXT: mvghi 160(%r15), 0 +; CHECK-NEXT: brasl %r14, fn3@PLT +; CHECK-NEXT: l %r2, 188(%r15) +; CHECK-NEXT: lmg %r14, %r15, 304(%r15) +; CHECK-NEXT: br %r14 + %1 = alloca i32 + store i32 -1, i32* %1 + call void @fn3(i136 0) + %2 = load i32, i32* %1 + ret i32 %2 +}