Index: llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp +++ llvm/trunk/lib/Target/Mips/MipsISelLowering.cpp @@ -2949,12 +2949,44 @@ CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext(), MipsCCState::getSpecialCallingConvForCallee(Callee.getNode(), Subtarget)); + const ExternalSymbolSDNode *ES = + dyn_cast_or_null(Callee.getNode()); + + // There is one case where CALLSEQ_START..CALLSEQ_END can be nested, which + // is during the lowering of a call with a byval argument which produces + // a call to memcpy. For the O32 case, this causes the caller to allocate + // stack space for the reserved argument area for the callee, then recursively + // again for the memcpy call. In the NEWABI case, this doesn't occur as those + // ABIs mandate that the callee allocates the reserved argument area. We do + // still produce nested CALLSEQ_START..CALLSEQ_END with zero space though. + // + // If the callee has a byval argument and memcpy is used, we are mandated + // to already have produced a reserved argument area for the callee for O32. + // Therefore, the reserved argument area can be reused for both calls. + // + // Other cases of calling memcpy cannot have a chain with a CALLSEQ_START + // present, as we have yet to hook that node onto the chain. + // + // Hence, the CALLSEQ_START and CALLSEQ_END nodes can be eliminated in this + // case. GCC does a similar trick, in that wherever possible, it calculates + // the maximum out going argument area (including the reserved area), and + // preallocates the stack space on entrance to the caller. + // + // FIXME: We should do the same for efficency and space. + + // Note: The check on the calling convention below must match + // MipsABIInfo::GetCalleeAllocdArgSizeInBytes(). + bool MemcpyInByVal = ES && + StringRef(ES->getSymbol()) == StringRef("memcpy") && + CallConv != CallingConv::Fast && + Chain.getOpcode() == ISD::CALLSEQ_START; + // Allocate the reserved argument area. It seems strange to do this from the // caller side but removing it breaks the frame size calculation. - CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(CallConv), 1); + unsigned ReservedArgArea = + MemcpyInByVal ? 0 : ABI.GetCalleeAllocdArgSizeInBytes(CallConv); + CCInfo.AllocateStack(ReservedArgArea, 1); - const ExternalSymbolSDNode *ES = - dyn_cast_or_null(Callee.getNode()); CCInfo.AnalyzeCallOperands(Outs, CC_Mips, CLI.getArgs(), ES ? ES->getSymbol() : nullptr); @@ -2989,7 +3021,7 @@ NextStackOffset = alignTo(NextStackOffset, StackAlignment); SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, DL, true); - if (!IsTailCall) + if (!(IsTailCall || MemcpyInByVal)) Chain = DAG.getCALLSEQ_START(Chain, NextStackOffset, 0, DL); SDValue StackPtr = @@ -3197,10 +3229,13 @@ Chain = DAG.getNode(MipsISD::JmpLink, DL, NodeTys, Ops); SDValue InFlag = Chain.getValue(1); - // Create the CALLSEQ_END node. - Chain = DAG.getCALLSEQ_END(Chain, NextStackOffsetVal, - DAG.getIntPtrConstant(0, DL, true), InFlag, DL); - InFlag = Chain.getValue(1); + // Create the CALLSEQ_END node in the case of where it is not a call to + // memcpy. + if (!(MemcpyInByVal)) { + Chain = DAG.getCALLSEQ_END(Chain, NextStackOffsetVal, + DAG.getIntPtrConstant(0, DL, true), InFlag, DL); + InFlag = Chain.getValue(1); + } // Handle result values, copying them out of physregs into vregs that we // return. Index: llvm/trunk/test/CodeGen/Mips/cconv/byval.ll =================================================================== --- llvm/trunk/test/CodeGen/Mips/cconv/byval.ll +++ llvm/trunk/test/CodeGen/Mips/cconv/byval.ll @@ -0,0 +1,430 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; NOTE: The SelectionDAG checks have been added by hand. + +; RUN: llc < %s -mtriple=mips-linux-gnu -verify-machineinstrs \ +; RUN: | FileCheck %s --check-prefix=O32 +; RUN: llc < %s -mtriple=mips64-linux-gnu -target-abi n32 -verify-machineinstrs \ +; RUN: | FileCheck %s --check-prefix=N32 +; RUN: llc < %s -mtriple=mips64-linux-gnu -target-abi n64 -verify-machineinstrs \ +; RUN: | FileCheck %s --check-prefix=N64 + +; RUN: llc < %s -mtriple=mips-linux-gnu -verify-machineinstrs -debug 2>&1 \ +; RUN: | FileCheck %s --check-prefix=O32-SDAG +; RUN: llc < %s -mtriple=mips64-linux-gnu -target-abi n32 -verify-machineinstrs \ +; RUN: -debug 2>&1 | FileCheck %s --check-prefix=N32-SDAG +; RUN: llc < %s -mtriple=mips64-linux-gnu -target-abi n64 -verify-machineinstrs \ +; RUN: -debug 2>&1 | FileCheck %s --check-prefix=N64-SDAG + +; REQUIRES: asserts + +; Test that reserved argument area is shared between the memcpy call and the +; call to f2. This eliminates the nested call sequence nodes. + +; Also, test that a basic call to memcpy reserves its outgoing argument area. + +; FIXME: We should also be explicit about testing that the loads for the +; arguments are scheduled after the memcpy, but that wasn't enforced in +; this patch. + +%struct.S1 = type { [65520 x i8] } + +; O32-SDAG-LABEL: Initial selection DAG: %bb.0 'g:entry' +; O32-SDAG: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i32<{{.*}}> +; O32-SDAG-NOT: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i32<{{.*}}> +; O32-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetExternalSymbol:i32'memcpy' +; O32-SDAG-NOT: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i32<{{.*}}> +; O32-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetGlobalAddress:i32 +; O32-SDAG: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i32<{{.*}}> + +; N32-SDAG-LABEL: Initial selection DAG: %bb.0 'g:entry' +; N32-SDAG: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i32<{{.*}}> +; N32-SDAG-NOT: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i32<{{.*}}> +; N32-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetExternalSymbol:i32'memcpy' +; N32-SDAG-NOT: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i32<{{.*}}> +; N32-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetGlobalAddress:i32 +; N32-SDAG: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i32<{{.*}}> + +; N64-SDAG-LABEL: Initial selection DAG: %bb.0 'g:entry' +; N64-SDAG: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i64<{{.*}}> +; N64-SDAG-NOT: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i64<{{.*}}> +; N64-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetExternalSymbol:i64'memcpy' +; N64-SDAG-NOT: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i64<{{.*}}> +; N64-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetGlobalAddress:i64 +; N64-SDAG: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i64<{{.*}}> + +define dso_local void @g() #0 { +; O32-LABEL: g: +; O32: # %bb.0: # %entry +; O32-NEXT: lui $1, 1 +; O32-NEXT: subu $sp, $sp, $1 +; O32-NEXT: .cfi_def_cfa_offset 65536 +; O32-NEXT: lui $1, 1 +; O32-NEXT: addu $1, $sp, $1 +; O32-NEXT: sw $ra, -4($1) # 4-byte Folded Spill +; O32-NEXT: .cfi_offset 31, -4 +; O32-NEXT: ori $1, $zero, 65520 +; O32-NEXT: subu $sp, $sp, $1 +; O32-NEXT: addiu $1, $sp, 8 +; O32-NEXT: addiu $5, $1, 16 +; O32-NEXT: addiu $4, $sp, 16 +; O32-NEXT: jal memcpy +; O32-NEXT: ori $6, $zero, 65504 +; O32-NEXT: lw $7, 20($sp) +; O32-NEXT: lw $6, 16($sp) +; O32-NEXT: lw $5, 12($sp) +; O32-NEXT: jal f2 +; O32-NEXT: lw $4, 8($sp) +; O32-NEXT: ori $1, $zero, 65520 +; O32-NEXT: addu $sp, $sp, $1 +; O32-NEXT: lui $1, 1 +; O32-NEXT: addu $1, $sp, $1 +; O32-NEXT: lw $ra, -4($1) # 4-byte Folded Reload +; O32-NEXT: lui $1, 1 +; O32-NEXT: jr $ra +; O32-NEXT: addu $sp, $sp, $1 +; +; N32-LABEL: g: +; N32: # %bb.0: # %entry +; N32-NEXT: lui $1, 1 +; N32-NEXT: subu $sp, $sp, $1 +; N32-NEXT: .cfi_def_cfa_offset 65536 +; N32-NEXT: lui $1, 1 +; N32-NEXT: addu $1, $sp, $1 +; N32-NEXT: sd $ra, -8($1) # 8-byte Folded Spill +; N32-NEXT: .cfi_offset 31, -8 +; N32-NEXT: ori $1, $zero, 65456 +; N32-NEXT: subu $sp, $sp, $1 +; N32-NEXT: addiu $1, $sp, 8 +; N32-NEXT: addiu $5, $1, 64 +; N32-NEXT: ori $6, $zero, 65456 +; N32-NEXT: jal memcpy +; N32-NEXT: move $4, $sp +; N32-NEXT: ld $11, 64($sp) +; N32-NEXT: ld $10, 56($sp) +; N32-NEXT: ld $9, 48($sp) +; N32-NEXT: ld $8, 40($sp) +; N32-NEXT: ld $7, 32($sp) +; N32-NEXT: ld $6, 24($sp) +; N32-NEXT: ld $5, 16($sp) +; N32-NEXT: jal f2 +; N32-NEXT: ld $4, 8($sp) +; N32-NEXT: ori $1, $zero, 65456 +; N32-NEXT: addu $sp, $sp, $1 +; N32-NEXT: lui $1, 1 +; N32-NEXT: addu $1, $sp, $1 +; N32-NEXT: ld $ra, -8($1) # 8-byte Folded Reload +; N32-NEXT: lui $1, 1 +; N32-NEXT: jr $ra +; N32-NEXT: addu $sp, $sp, $1 +; +; N64-LABEL: g: +; N64: # %bb.0: # %entry +; N64-NEXT: lui $1, 1 +; N64-NEXT: dsubu $sp, $sp, $1 +; N64-NEXT: .cfi_def_cfa_offset 65536 +; N64-NEXT: lui $1, 1 +; N64-NEXT: daddu $1, $sp, $1 +; N64-NEXT: sd $ra, -8($1) # 8-byte Folded Spill +; N64-NEXT: .cfi_offset 31, -8 +; N64-NEXT: ori $1, $zero, 65456 +; N64-NEXT: dsubu $sp, $sp, $1 +; N64-NEXT: daddiu $1, $sp, 8 +; N64-NEXT: daddiu $5, $1, 64 +; N64-NEXT: ori $6, $zero, 65456 +; N64-NEXT: jal memcpy +; N64-NEXT: move $4, $sp +; N64-NEXT: ld $11, 64($sp) +; N64-NEXT: ld $10, 56($sp) +; N64-NEXT: ld $9, 48($sp) +; N64-NEXT: ld $8, 40($sp) +; N64-NEXT: ld $7, 32($sp) +; N64-NEXT: ld $6, 24($sp) +; N64-NEXT: ld $5, 16($sp) +; N64-NEXT: jal f2 +; N64-NEXT: ld $4, 8($sp) +; N64-NEXT: ori $1, $zero, 65456 +; N64-NEXT: daddu $sp, $sp, $1 +; N64-NEXT: lui $1, 1 +; N64-NEXT: daddu $1, $sp, $1 +; N64-NEXT: ld $ra, -8($1) # 8-byte Folded Reload +; N64-NEXT: lui $1, 1 +; N64-NEXT: jr $ra +; N64-NEXT: daddu $sp, $sp, $1 +entry: + %a = alloca %struct.S1, align 4 + call void @f2(%struct.S1* byval align 4 %a) + ret void +} + +declare dso_local void @f2(%struct.S1* byval align 4) #1 + +; O32-SDAG-LABEL: Initial selection DAG: %bb.0 'g2:entry' +; O32-SDAG: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i32<{{.*}}> +; O32-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetExternalSymbol:i32'memcpy' +; O32-SDAG: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i32<{{.*}}> +; O32-SDAG: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i32<{{.*}}> +; O32-SDAG-NOT: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i32<{{.*}}> +; O32-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetExternalSymbol:i32'memcpy' +; O32-SDAG-NOT: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i32<{{.*}}> +; O32-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetGlobalAddress:i32 +; O32-SDAG: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i32<{{.*}}> + +; N32-SDAG-LABEL: Initial selection DAG: %bb.0 'g2:entry' +; N32-SDAG: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i32<{{.*}}> +; N32-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetExternalSymbol:i32'memcpy' +; N32-SDAG: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i32<{{.*}}> +; N32-SDAG: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i32<{{.*}}> +; N32-SDAG-NOT: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i32<{{.*}}> +; N32-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetExternalSymbol:i32'memcpy' +; N32-SDAG-NOT: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i32<{{.*}}> +; N32-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetGlobalAddress:i32 +; N32-SDAG: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i32<{{.*}}> + +; N64-SDAG-LABEL: Initial selection DAG: %bb.0 'g2:entry' +; N64-SDAG: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i64<{{.*}}> +; N64-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetExternalSymbol:i64'memcpy' +; N64-SDAG: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i64<{{.*}}> +; N64-SDAG: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i64<{{.*}}> +; N64-SDAG-NOT: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i64<{{.*}}> +; N64-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetExternalSymbol:i64'memcpy' +; N64-SDAG-NOT: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i64<{{.*}}> +; N64-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetGlobalAddress:i64 +; N64-SDAG: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i64<{{.*}}> + +define dso_local void @g2(%struct.S1* %a) { +; O32-LABEL: g2: +; O32: # %bb.0: # %entry +; O32-NEXT: lui $1, 1 +; O32-NEXT: addiu $1, $1, 8 +; O32-NEXT: subu $sp, $sp, $1 +; O32-NEXT: .cfi_def_cfa_offset 65544 +; O32-NEXT: lui $1, 1 +; O32-NEXT: addu $1, $sp, $1 +; O32-NEXT: sw $ra, 4($1) # 4-byte Folded Spill +; O32-NEXT: lui $1, 1 +; O32-NEXT: addu $1, $sp, $1 +; O32-NEXT: sw $16, 0($1) # 4-byte Folded Spill +; O32-NEXT: .cfi_offset 31, -4 +; O32-NEXT: .cfi_offset 16, -8 +; O32-NEXT: move $5, $4 +; O32-NEXT: lui $1, 1 +; O32-NEXT: addu $1, $sp, $1 +; O32-NEXT: sw $4, -4($1) +; O32-NEXT: addiu $sp, $sp, -16 +; O32-NEXT: addiu $16, $sp, 8 +; O32-NEXT: ori $6, $zero, 65520 +; O32-NEXT: jal memcpy +; O32-NEXT: move $4, $16 +; O32-NEXT: addiu $sp, $sp, 16 +; O32-NEXT: ori $1, $zero, 65520 +; O32-NEXT: subu $sp, $sp, $1 +; O32-NEXT: addiu $5, $16, 16 +; O32-NEXT: addiu $4, $sp, 16 +; O32-NEXT: jal memcpy +; O32-NEXT: ori $6, $zero, 65504 +; O32-NEXT: lw $7, 20($sp) +; O32-NEXT: lw $6, 16($sp) +; O32-NEXT: lw $5, 12($sp) +; O32-NEXT: jal f2 +; O32-NEXT: lw $4, 8($sp) +; O32-NEXT: ori $1, $zero, 65520 +; O32-NEXT: addu $sp, $sp, $1 +; O32-NEXT: lui $1, 1 +; O32-NEXT: addu $1, $sp, $1 +; O32-NEXT: lw $16, 0($1) # 4-byte Folded Reload +; O32-NEXT: lui $1, 1 +; O32-NEXT: addu $1, $sp, $1 +; O32-NEXT: lw $ra, 4($1) # 4-byte Folded Reload +; O32-NEXT: lui $1, 1 +; O32-NEXT: addiu $1, $1, 8 +; O32-NEXT: jr $ra +; O32-NEXT: addu $sp, $sp, $1 +; +; N32-LABEL: g2: +; N32: # %bb.0: # %entry +; N32-NEXT: lui $1, 1 +; N32-NEXT: addiu $1, $1, 16 +; N32-NEXT: subu $sp, $sp, $1 +; N32-NEXT: .cfi_def_cfa_offset 65552 +; N32-NEXT: lui $1, 1 +; N32-NEXT: addu $1, $sp, $1 +; N32-NEXT: sd $ra, 8($1) # 8-byte Folded Spill +; N32-NEXT: lui $1, 1 +; N32-NEXT: addu $1, $sp, $1 +; N32-NEXT: sd $16, 0($1) # 8-byte Folded Spill +; N32-NEXT: .cfi_offset 31, -8 +; N32-NEXT: .cfi_offset 16, -16 +; N32-NEXT: move $5, $4 +; N32-NEXT: sll $1, $5, 0 +; N32-NEXT: lui $2, 1 +; N32-NEXT: addu $2, $sp, $2 +; N32-NEXT: sw $1, -4($2) +; N32-NEXT: addiu $16, $sp, 8 +; N32-NEXT: ori $6, $zero, 65520 +; N32-NEXT: jal memcpy +; N32-NEXT: move $4, $16 +; N32-NEXT: addiu $5, $16, 64 +; N32-NEXT: ori $1, $zero, 65456 +; N32-NEXT: subu $sp, $sp, $1 +; N32-NEXT: ori $6, $zero, 65456 +; N32-NEXT: jal memcpy +; N32-NEXT: move $4, $sp +; N32-NEXT: ld $11, 64($sp) +; N32-NEXT: ld $10, 56($sp) +; N32-NEXT: ld $9, 48($sp) +; N32-NEXT: ld $8, 40($sp) +; N32-NEXT: ld $7, 32($sp) +; N32-NEXT: ld $6, 24($sp) +; N32-NEXT: ld $5, 16($sp) +; N32-NEXT: jal f2 +; N32-NEXT: ld $4, 8($sp) +; N32-NEXT: ori $1, $zero, 65456 +; N32-NEXT: addu $sp, $sp, $1 +; N32-NEXT: lui $1, 1 +; N32-NEXT: addu $1, $sp, $1 +; N32-NEXT: ld $16, 0($1) # 8-byte Folded Reload +; N32-NEXT: lui $1, 1 +; N32-NEXT: addu $1, $sp, $1 +; N32-NEXT: ld $ra, 8($1) # 8-byte Folded Reload +; N32-NEXT: lui $1, 1 +; N32-NEXT: addiu $1, $1, 16 +; N32-NEXT: jr $ra +; N32-NEXT: addu $sp, $sp, $1 +; +; N64-LABEL: g2: +; N64: # %bb.0: # %entry +; N64-NEXT: lui $1, 1 +; N64-NEXT: daddiu $1, $1, 16 +; N64-NEXT: dsubu $sp, $sp, $1 +; N64-NEXT: .cfi_def_cfa_offset 65552 +; N64-NEXT: lui $1, 1 +; N64-NEXT: daddu $1, $sp, $1 +; N64-NEXT: sd $ra, 8($1) # 8-byte Folded Spill +; N64-NEXT: lui $1, 1 +; N64-NEXT: daddu $1, $sp, $1 +; N64-NEXT: sd $16, 0($1) # 8-byte Folded Spill +; N64-NEXT: .cfi_offset 31, -8 +; N64-NEXT: .cfi_offset 16, -16 +; N64-NEXT: move $5, $4 +; N64-NEXT: lui $1, 1 +; N64-NEXT: daddu $1, $sp, $1 +; N64-NEXT: sd $4, -8($1) +; N64-NEXT: daddiu $16, $sp, 8 +; N64-NEXT: ori $6, $zero, 65520 +; N64-NEXT: jal memcpy +; N64-NEXT: move $4, $16 +; N64-NEXT: ori $1, $zero, 65456 +; N64-NEXT: dsubu $sp, $sp, $1 +; N64-NEXT: daddiu $5, $16, 64 +; N64-NEXT: ori $6, $zero, 65456 +; N64-NEXT: jal memcpy +; N64-NEXT: move $4, $sp +; N64-NEXT: ld $11, 64($sp) +; N64-NEXT: ld $10, 56($sp) +; N64-NEXT: ld $9, 48($sp) +; N64-NEXT: ld $8, 40($sp) +; N64-NEXT: ld $7, 32($sp) +; N64-NEXT: ld $6, 24($sp) +; N64-NEXT: ld $5, 16($sp) +; N64-NEXT: jal f2 +; N64-NEXT: ld $4, 8($sp) +; N64-NEXT: ori $1, $zero, 65456 +; N64-NEXT: daddu $sp, $sp, $1 +; N64-NEXT: lui $1, 1 +; N64-NEXT: daddu $1, $sp, $1 +; N64-NEXT: ld $16, 0($1) # 8-byte Folded Reload +; N64-NEXT: lui $1, 1 +; N64-NEXT: daddu $1, $sp, $1 +; N64-NEXT: ld $ra, 8($1) # 8-byte Folded Reload +; N64-NEXT: lui $1, 1 +; N64-NEXT: daddiu $1, $1, 16 +; N64-NEXT: jr $ra +; N64-NEXT: daddu $sp, $sp, $1 +entry: + %a.addr = alloca %struct.S1*, align 4 + %byval-temp = alloca %struct.S1, align 4 + store %struct.S1* %a, %struct.S1** %a.addr, align 4 + %0 = load %struct.S1*, %struct.S1** %a.addr, align 4 + %1 = bitcast %struct.S1* %byval-temp to i8* + %2 = bitcast %struct.S1* %0 to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %1, i8* align 1 %2, i32 65520, i1 false) + call void @f2(%struct.S1* byval align 4 %byval-temp) + ret void +} + +; O32-SDAG-LABEL: Initial selection DAG: %bb.0 'g3:entry' +; O32-SDAG: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i32<16> +; O32-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetExternalSymbol:i32'memcpy' +; O32-SDAG: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i32<16> + +; N32-SDAG-LABEL: Initial selection DAG: %bb.0 'g3:entry' +; N32-SDAG: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i32<0> +; N32-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetExternalSymbol:i32'memcpy' +; N32-SDAG: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i32<0> + +; N64-SDAG-LABEL: Initial selection DAG: %bb.0 'g3:entry' +; N64-SDAG: t{{.*}}: ch,glue = callseq_start t{{.*}}, TargetConstant:i64<0> +; N64-SDAG: t{{.*}}: ch,glue = MipsISD::JmpLink t{{.*}}, TargetExternalSymbol:i64'memcpy' +; N64-SDAG: t{{.*}}: ch,glue = callseq_end t{{.*}}, TargetConstant:i64<0> + +define dso_local i32 @g3(%struct.S1* %a, %struct.S1* %b) #0 { +; O32-LABEL: g3: +; O32: # %bb.0: # %entry +; O32-NEXT: addiu $sp, $sp, -32 +; O32-NEXT: .cfi_def_cfa_offset 32 +; O32-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill +; O32-NEXT: .cfi_offset 31, -4 +; O32-NEXT: sw $5, 20($sp) +; O32-NEXT: sw $4, 24($sp) +; O32-NEXT: jal memcpy +; O32-NEXT: ori $6, $zero, 65520 +; O32-NEXT: addiu $2, $zero, 4 +; O32-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload +; O32-NEXT: jr $ra +; O32-NEXT: addiu $sp, $sp, 32 +; +; N32-LABEL: g3: +; N32: # %bb.0: # %entry +; N32-NEXT: addiu $sp, $sp, -16 +; N32-NEXT: .cfi_def_cfa_offset 16 +; N32-NEXT: sd $ra, 8($sp) # 8-byte Folded Spill +; N32-NEXT: .cfi_offset 31, -8 +; N32-NEXT: sll $1, $5, 0 +; N32-NEXT: sw $1, 0($sp) +; N32-NEXT: sll $1, $4, 0 +; N32-NEXT: sw $1, 4($sp) +; N32-NEXT: jal memcpy +; N32-NEXT: ori $6, $zero, 65520 +; N32-NEXT: addiu $2, $zero, 4 +; N32-NEXT: ld $ra, 8($sp) # 8-byte Folded Reload +; N32-NEXT: jr $ra +; N32-NEXT: addiu $sp, $sp, 16 +; +; N64-LABEL: g3: +; N64: # %bb.0: # %entry +; N64-NEXT: daddiu $sp, $sp, -32 +; N64-NEXT: .cfi_def_cfa_offset 32 +; N64-NEXT: sd $ra, 24($sp) # 8-byte Folded Spill +; N64-NEXT: .cfi_offset 31, -8 +; N64-NEXT: sd $5, 8($sp) +; N64-NEXT: sd $4, 16($sp) +; N64-NEXT: jal memcpy +; N64-NEXT: ori $6, $zero, 65520 +; N64-NEXT: addiu $2, $zero, 4 +; N64-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload +; N64-NEXT: jr $ra +; N64-NEXT: daddiu $sp, $sp, 32 +entry: + %a.addr = alloca %struct.S1*, align 4 + %b.addr = alloca %struct.S1*, align 4 + store %struct.S1* %a, %struct.S1** %a.addr, align 4 + store %struct.S1* %b, %struct.S1** %b.addr, align 4 + %0 = load %struct.S1*, %struct.S1** %a.addr, align 4 + %1 = bitcast %struct.S1* %0 to i8* + %2 = load %struct.S1*, %struct.S1** %b.addr, align 4 + %3 = bitcast %struct.S1* %2 to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %1, i8* align 1 %3, i32 65520, i1 false) + ret i32 4 +} + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) #2