Index: lib/Target/X86/X86CallLowering.cpp =================================================================== --- lib/Target/X86/X86CallLowering.cpp +++ lib/Target/X86/X86CallLowering.cpp @@ -84,7 +84,7 @@ MachineInstrBuilder &MIB, CCAssignFn *AssignFn) : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB), StackSize(0), DL(MIRBuilder.getMF().getDataLayout()), - STI(MIRBuilder.getMF().getSubtarget()) {} + STI(MIRBuilder.getMF().getSubtarget()), NumXMMRegs(0) {} unsigned getStackAddress(uint64_t Size, int64_t Offset, MachinePointerInfo &MPO) override { @@ -125,21 +125,27 @@ CCValAssign::LocInfo LocInfo, const CallLowering::ArgInfo &Info, CCState &State) override { - if (!Info.IsFixed) - return true; // TODO: handle variadic function - bool Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State); StackSize = State.getNextStackOffset(); + + static const MCPhysReg XMMArgRegs[] = {X86::XMM0, X86::XMM1, X86::XMM2, + X86::XMM3, X86::XMM4, X86::XMM5, + X86::XMM6, X86::XMM7}; + if (!Info.IsFixed) + NumXMMRegs = State.getFirstUnallocated(XMMArgRegs); + return Res; } uint64_t getStackSize() { return StackSize; } + uint64_t getNumXmmRegs() { return NumXMMRegs; } protected: MachineInstrBuilder &MIB; uint64_t StackSize; const DataLayout &DL; const X86Subtarget &STI; + unsigned NumXMMRegs; }; } // End anonymous namespace. @@ -321,12 +327,15 @@ TRI->getCallPreservedMask(MF, CallConv)); SmallVector SplitArgs; + bool IsFixed = true; for (const auto &OrigArg : OrigArgs) { // TODO: handle not simple cases. if (OrigArg.Flags.isByVal()) return false; + IsFixed = OrigArg.IsFixed; + if (!splitToValueTypes(OrigArg, SplitArgs, DL, MRI, [&](ArrayRef Regs) { MIRBuilder.buildUnmerge(Regs, OrigArg.Reg); @@ -338,6 +347,21 @@ if (!handleAssignments(MIRBuilder, SplitArgs, Handler)) return false; + if (STI.is64Bit() && !IsFixed && !STI.isCallingConvWin64(CallConv)) { + // From AMD64 ABI document: + // For calls that may call functions that use varargs or stdargs + // (prototype-less calls or calls to functions containing ellipsis (...) in + // the declaration) %al is used as hidden argument to specify the number + // of SSE registers used. The contents of %al do not need to match exactly + // the number of registers, but must be an ubound on the number of SSE + // registers used and is in the range 0 - 8 inclusive. + + MIRBuilder.buildInstr(X86::MOV8ri) + .addDef(X86::AL) + .addImm(Handler.getNumXmmRegs()); + MIB.addUse(X86::AL, RegState::Implicit); + } + // Now we can add the actual call instruction to the correct basic block. MIRBuilder.insertInstr(MIB); Index: test/CodeGen/X86/GlobalISel/callingconv.ll =================================================================== --- test/CodeGen/X86/GlobalISel/callingconv.ll +++ test/CodeGen/X86/GlobalISel/callingconv.ll @@ -356,3 +356,77 @@ call void @take_char(i8 zeroext %val) ret void } + +declare void @variadic_callee(i8*, ...) +define void @test_variadic_call_1(i8** %addr_ptr, i32* %val_ptr) { +; X32-LABEL: test_variadic_call_1: +; X32: # BB#0: +; X32-NEXT: subl $12, %esp +; X32-NEXT: .Lcfi10: +; X32-NEXT: .cfi_def_cfa_offset 16 +; X32-NEXT: movl 16(%esp), %eax +; X32-NEXT: movl 20(%esp), %ecx +; X32-NEXT: movl (%eax), %eax +; X32-NEXT: movl (%ecx), %ecx +; X32-NEXT: movl %eax, (%esp) +; X32-NEXT: movl %ecx, 4(%esp) +; X32-NEXT: calll variadic_callee +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl +; +; X64-LABEL: test_variadic_call_1: +; X64: # BB#0: +; X64-NEXT: pushq %rax +; X64-NEXT: .Lcfi8: +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: movq (%rdi), %rdi +; X64-NEXT: movl (%rsi), %esi +; X64-NEXT: movb $0, %al +; X64-NEXT: callq variadic_callee +; X64-NEXT: popq %rax +; X64-NEXT: retq + + %addr = load i8*, i8** %addr_ptr + %val = load i32, i32* %val_ptr + call void (i8*, ...) @variadic_callee(i8* %addr, i32 %val) + ret void +} + +define void @test_variadic_call_2(i8** %addr_ptr, double* %val_ptr) { +; X32-LABEL: test_variadic_call_2: +; X32: # BB#0: +; X32-NEXT: subl $12, %esp +; X32-NEXT: .Lcfi11: +; X32-NEXT: .cfi_def_cfa_offset 16 +; X32-NEXT: movl 16(%esp), %eax +; X32-NEXT: movl 20(%esp), %ecx +; X32-NEXT: movl (%eax), %eax +; X32-NEXT: movl (%ecx), %edx +; X32-NEXT: movl 4(%ecx), %ecx +; X32-NEXT: movl %eax, (%esp) +; X32-NEXT: movl $4, %eax +; X32-NEXT: leal (%esp,%eax), %eax +; X32-NEXT: movl %edx, 4(%esp) +; X32-NEXT: movl %ecx, 4(%eax) +; X32-NEXT: calll variadic_callee +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl +; +; X64-LABEL: test_variadic_call_2: +; X64: # BB#0: +; X64-NEXT: pushq %rax +; X64-NEXT: .Lcfi9: +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: movq (%rdi), %rdi +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: movb $1, %al +; X64-NEXT: movq %rcx, %xmm0 +; X64-NEXT: callq variadic_callee +; X64-NEXT: popq %rax +; X64-NEXT: retq + + %addr = load i8*, i8** %addr_ptr + %val = load double, double* %val_ptr + call void (i8*, ...) @variadic_callee(i8* %addr, double %val) + ret void +} Index: test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll =================================================================== --- test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll +++ test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll @@ -697,3 +697,92 @@ ret void } +declare void @variadic_callee(i8*, ...) +define void @test_variadic_call_1(i8** %addr_ptr, i32* %val_ptr) { +; ALL-LABEL: name: test_variadic_call_1 + +; X32: fixedStack: +; X32-NEXT: - { id: 0, type: default, offset: 4, size: 4, alignment: 4, stack-id: 0, +; X32-NEXT: isImmutable: true, isAliased: false, callee-saved-register: '' } +; X32-NEXT: - { id: 1, type: default, offset: 0, size: 4, alignment: 16, stack-id: 0, +; X32-NEXT: isImmutable: true, isAliased: false, callee-saved-register: '' } +; X32: %2(p0) = G_FRAME_INDEX %fixed-stack.1 +; X32-NEXT: %0(p0) = G_LOAD %2(p0) :: (invariant load 4 from %fixed-stack.1, align 0) +; X32-NEXT: %3(p0) = G_FRAME_INDEX %fixed-stack.0 +; X32-NEXT: %1(p0) = G_LOAD %3(p0) :: (invariant load 4 from %fixed-stack.0, align 0) +; X32-NEXT: %4(p0) = G_LOAD %0(p0) :: (load 4 from %ir.addr_ptr) +; X32-NEXT: %5(s32) = G_LOAD %1(p0) :: (load 4 from %ir.val_ptr) +; X32-NEXT: ADJCALLSTACKDOWN32 8, 0, 0, implicit-def %esp, implicit-def %eflags, implicit %esp +; X32-NEXT: %6(p0) = COPY %esp +; X32-NEXT: %7(s32) = G_CONSTANT i32 0 +; X32-NEXT: %8(p0) = G_GEP %6, %7(s32) +; X32-NEXT: G_STORE %4(p0), %8(p0) :: (store 4 into stack, align 0) +; X32-NEXT: %9(p0) = COPY %esp +; X32-NEXT: %10(s32) = G_CONSTANT i32 4 +; X32-NEXT: %11(p0) = G_GEP %9, %10(s32) +; X32-NEXT: G_STORE %5(s32), %11(p0) :: (store 4 into stack + 4, align 0) +; X32-NEXT: CALLpcrel32 @variadic_callee, csr_32, implicit %esp +; X32-NEXT: ADJCALLSTACKUP32 8, 0, implicit-def %esp, implicit-def %eflags, implicit %esp +; X32-NEXT: RET 0 + +; X64: %0(p0) = COPY %rdi +; X64-NEXT: %1(p0) = COPY %rsi +; X64-NEXT: %2(p0) = G_LOAD %0(p0) :: (load 8 from %ir.addr_ptr) +; X64-NEXT: %3(s32) = G_LOAD %1(p0) :: (load 4 from %ir.val_ptr) +; X64-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def %rsp, implicit-def %eflags, implicit %rsp +; X64-NEXT: %rdi = COPY %2(p0) +; X64-NEXT: %esi = COPY %3(s32) +; X64-NEXT: %al = MOV8ri 0 +; X64-NEXT: CALL64pcrel32 @variadic_callee, csr_64, implicit %rsp, implicit %rdi, implicit %esi, implicit %al +; X64-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def %rsp, implicit-def %eflags, implicit %rsp +; X64-NEXT: RET 0 + + %addr = load i8*, i8** %addr_ptr + %val = load i32, i32* %val_ptr + call void (i8*, ...) @variadic_callee(i8* %addr, i32 %val) + ret void +} + +define void @test_variadic_call_2(i8** %addr_ptr, double* %val_ptr) { +; ALL-LABEL: name: test_variadic_call_2 + +; X32: fixedStack: +; X32-NEXT: - { id: 0, type: default, offset: 4, size: 4, alignment: 4, stack-id: 0, +; X32-NEXT: isImmutable: true, isAliased: false, callee-saved-register: '' } +; X32-NEXT: - { id: 1, type: default, offset: 0, size: 4, alignment: 16, stack-id: 0, +; X32-NEXT: isImmutable: true, isAliased: false, callee-saved-register: '' } +; X32: %2(p0) = G_FRAME_INDEX %fixed-stack.1 +; X32-NEXT: %0(p0) = G_LOAD %2(p0) :: (invariant load 4 from %fixed-stack.1, align 0) +; X32-NEXT: %3(p0) = G_FRAME_INDEX %fixed-stack.0 +; X32-NEXT: %1(p0) = G_LOAD %3(p0) :: (invariant load 4 from %fixed-stack.0, align 0) +; X32-NEXT: %4(p0) = G_LOAD %0(p0) :: (load 4 from %ir.addr_ptr) +; X32-NEXT: %5(s64) = G_LOAD %1(p0) :: (load 8 from %ir.val_ptr, align 4) +; X32-NEXT: ADJCALLSTACKDOWN32 12, 0, 0, implicit-def %esp, implicit-def %eflags, implicit %esp +; X32-NEXT: %6(p0) = COPY %esp +; X32-NEXT: %7(s32) = G_CONSTANT i32 0 +; X32-NEXT: %8(p0) = G_GEP %6, %7(s32) +; X32-NEXT: G_STORE %4(p0), %8(p0) :: (store 4 into stack, align 0) +; X32-NEXT: %9(p0) = COPY %esp +; X32-NEXT: %10(s32) = G_CONSTANT i32 4 +; X32-NEXT: %11(p0) = G_GEP %9, %10(s32) +; X32-NEXT: G_STORE %5(s64), %11(p0) :: (store 8 into stack + 4, align 0) +; X32-NEXT: CALLpcrel32 @variadic_callee, csr_32, implicit %esp +; X32-NEXT: ADJCALLSTACKUP32 12, 0, implicit-def %esp, implicit-def %eflags, implicit %esp +; X32-NEXT: RET 0 + +; X64: %1(p0) = COPY %rsi +; X64-NEXT: %2(p0) = G_LOAD %0(p0) :: (load 8 from %ir.addr_ptr) +; X64-NEXT: %3(s64) = G_LOAD %1(p0) :: (load 8 from %ir.val_ptr) +; X64-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def %rsp, implicit-def %eflags, implicit %rsp +; X64-NEXT: %rdi = COPY %2(p0) +; X64-NEXT: %xmm0 = COPY %3(s64) +; X64-NEXT: %al = MOV8ri 1 +; X64-NEXT: CALL64pcrel32 @variadic_callee, csr_64, implicit %rsp, implicit %rdi, implicit %xmm0, implicit %al +; X64-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def %rsp, implicit-def %eflags, implicit %rsp +; X64-NEXT: RET 0 + + %addr = load i8*, i8** %addr_ptr + %val = load double, double* %val_ptr + call void (i8*, ...) @variadic_callee(i8* %addr, double %val) + ret void +}