diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -6170,20 +6170,13 @@ ArgOffset += PtrByteSize; continue; } - // Copy entire object into memory. There are cases where gcc-generated - // code assumes it is there, even if it could be put entirely into - // registers. (This is not what the doc says.) - - // FIXME: The above statement is likely due to a misunderstanding of the - // documents. All arguments must be copied into the parameter area BY - // THE CALLEE in the event that the callee takes the address of any - // formal argument. That has not yet been implemented. However, it is - // reasonable to use the stack area as a staging area for the register - // load. - - // Skip this for small aggregates, as we will use the same slot for a - // right-justified copy, below. - if (Size >= 8) + // Copy the object to parameter save area if it can not be entirely passed + // by registers. + // FIXME: we only need to copy the parts which need to be passed in + // parameter save area. For the parts passed by registers, we don't need + // to copy them to the stack although we need to allocate space for them + // in parameter save area. + if ((NumGPRs - GPR_idx) * PtrByteSize < Size) Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, CallSeqStart, Flags, DAG, dl); diff --git a/llvm/test/CodeGen/PowerPC/byval.ll b/llvm/test/CodeGen/PowerPC/byval.ll --- a/llvm/test/CodeGen/PowerPC/byval.ll +++ b/llvm/test/CodeGen/PowerPC/byval.ll @@ -9,45 +9,28 @@ declare dso_local i32 @foo1(%struct* byval(%struct) %var) declare dso_local void @foo(%struct* %var) -; FIXME: for the byval parameter %x, now the memory for local variable and -; for parameter save area are overlap. -; For the below case, -; the local variable space is r1 + 40 ~ r1 + 76 -; the parameter save area is r1 + 32 ~ r1 + 68 +; check that 36bytes byval parameter is passed all in registers. define dso_local i32 @bar() { ; CHECK-LABEL: bar: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: mflr 0 -; CHECK-NEXT: .cfi_def_cfa_offset 96 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset r30, -16 -; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill ; CHECK-NEXT: std 0, 16(1) -; CHECK-NEXT: stdu 1, -96(1) -; CHECK-NEXT: addi 30, 1, 40 -; CHECK-NEXT: mr 3, 30 +; CHECK-NEXT: stdu 1, -80(1) +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: addi 3, 1, 40 ; CHECK-NEXT: bl foo ; CHECK-NEXT: nop -; CHECK-NEXT: li 3, 16 -; CHECK-NEXT: lxvd2x 0, 30, 3 -; CHECK-NEXT: li 3, 48 -; CHECK-NEXT: stxvd2x 0, 1, 3 -; CHECK-NEXT: li 3, 32 -; CHECK-NEXT: lxvd2x 0, 0, 30 -; CHECK-NEXT: stxvd2x 0, 1, 3 -; CHECK-NEXT: lwz 3, 72(1) ; CHECK-NEXT: ld 7, 72(1) +; CHECK-NEXT: ld 6, 64(1) ; CHECK-NEXT: ld 5, 56(1) ; CHECK-NEXT: ld 4, 48(1) -; CHECK-NEXT: stw 3, 64(1) ; CHECK-NEXT: ld 3, 40(1) -; CHECK-NEXT: ld 6, 64(1) ; CHECK-NEXT: bl foo1 ; CHECK-NEXT: nop -; CHECK-NEXT: addi 1, 1, 96 +; CHECK-NEXT: addi 1, 1, 80 ; CHECK-NEXT: ld 0, 16(1) -; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload ; CHECK-NEXT: mtlr 0 ; CHECK-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/elf64-byval-cc.ll b/llvm/test/CodeGen/PowerPC/elf64-byval-cc.ll --- a/llvm/test/CodeGen/PowerPC/elf64-byval-cc.ll +++ b/llvm/test/CodeGen/PowerPC/elf64-byval-cc.ll @@ -340,8 +340,6 @@ ; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: addis 3, 2, .LC4@toc@ha ; CHECK-NEXT: ld 3, .LC4@toc@l(3) -; CHECK-NEXT: ld 4, 0(3) -; CHECK-NEXT: std 4, 32(1) ; CHECK-NEXT: ld 3, 0(3) ; CHECK-NEXT: bl test_byval_mem8 ; CHECK-NEXT: nop @@ -379,14 +377,7 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: addis 3, 2, .LC5@toc@ha -; CHECK-NEXT: li 4, 16 ; CHECK-NEXT: ld 3, .LC5@toc@l(3) -; CHECK-NEXT: lxvd2x 0, 3, 4 -; CHECK-NEXT: li 4, 48 -; CHECK-NEXT: stxvd2x 0, 1, 4 -; CHECK-NEXT: li 4, 32 -; CHECK-NEXT: lxvd2x 0, 0, 3 -; CHECK-NEXT: stxvd2x 0, 1, 4 ; CHECK-NEXT: ld 6, 24(3) ; CHECK-NEXT: ld 5, 16(3) ; CHECK-NEXT: ld 4, 8(3) @@ -427,16 +418,9 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: addis 3, 2, .LC5@toc@ha -; CHECK-NEXT: li 4, 16 ; CHECK-NEXT: addis 8, 2, .LCPI20_0@toc@ha ; CHECK-NEXT: ld 3, .LC5@toc@l(3) ; CHECK-NEXT: lfs 1, .LCPI20_0@toc@l(8) -; CHECK-NEXT: lxvd2x 0, 3, 4 -; CHECK-NEXT: li 4, 56 -; CHECK-NEXT: stxvd2x 0, 1, 4 -; CHECK-NEXT: li 4, 40 -; CHECK-NEXT: lxvd2x 0, 0, 3 -; CHECK-NEXT: stxvd2x 0, 1, 4 ; CHECK-NEXT: ld 7, 24(3) ; CHECK-NEXT: ld 6, 16(3) ; CHECK-NEXT: ld 5, 8(3) @@ -531,20 +515,7 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: addis 3, 2, .LC6@toc@ha -; CHECK-NEXT: li 4, 48 -; CHECK-NEXT: li 5, 80 -; CHECK-NEXT: li 6, 64 ; CHECK-NEXT: ld 3, .LC6@toc@l(3) -; CHECK-NEXT: lxvd2x 0, 3, 4 -; CHECK-NEXT: stxvd2x 0, 1, 5 -; CHECK-NEXT: li 5, 32 -; CHECK-NEXT: lxvd2x 0, 3, 5 -; CHECK-NEXT: stxvd2x 0, 1, 6 -; CHECK-NEXT: li 6, 16 -; CHECK-NEXT: lxvd2x 0, 3, 6 -; CHECK-NEXT: stxvd2x 0, 1, 4 -; CHECK-NEXT: lxvd2x 0, 0, 3 -; CHECK-NEXT: stxvd2x 0, 1, 5 ; CHECK-NEXT: ld 10, 56(3) ; CHECK-NEXT: ld 9, 48(3) ; CHECK-NEXT: ld 8, 40(3) diff --git a/llvm/test/CodeGen/PowerPC/ppc64-byval-align.ll b/llvm/test/CodeGen/PowerPC/ppc64-byval-align.ll --- a/llvm/test/CodeGen/PowerPC/ppc64-byval-align.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-byval-align.ll @@ -47,10 +47,11 @@ ret void } ; CHECK-LABEL: @caller2 -; CHECK: std 3, [[OFF:[0-9]+]](1) -; CHECK: addi [[REG1:[0-9]+]], 1, [[OFF]] -; CHECK: lxvw4x [[REG2:[0-9]+]], 0, [[REG1]] -; CHECK: li [[REG3:[0-9]+]], 128 -; CHECK: stxvw4x 0, 1, [[REG3]] -; CHECK: bl test2 +; CHECK-DAG: std 3, [[OFF:[0-9]+]](1) +; CHECK-DAG: addi [[REG1:[0-9]+]], 1, [[OFF]] +; +; CHECK-DAG: lxvw4x [[REG2:[0-9]+]], 0, [[REG1]] +; CHECK-DAG: li [[REG3:[0-9]+]], 128 +; CHECK: stxvw4x 0, 1, [[REG3]] +; CHECK: bl test2