Index: include/llvm/CodeGen/SelectionDAG.h =================================================================== --- include/llvm/CodeGen/SelectionDAG.h +++ include/llvm/CodeGen/SelectionDAG.h @@ -755,6 +755,12 @@ /// the target's desired shift amount type. SDValue getShiftAmountOperand(EVT LHSTy, SDValue Op); + /// \brief Expand the given \c ISD::VAARG node as the Legalize pass would. + SDValue expandVAArg(SDNode *Node); + + /// \brief Expand the given \c ISD::VACOPY node as the Legalize pass would. + SDValue expandVACopy(SDNode *Node); + /// UpdateNodeOperands - *Mutate* the specified node in-place to have the /// specified operands. If the resultant node already exists in the DAG, /// this does not modify the specified node, instead it returns the node that Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2933,57 +2933,13 @@ Results.push_back(Tmp1); break; } - case ISD::VAARG: { - const Value *V = cast(Node->getOperand(2))->getValue(); - EVT VT = Node->getValueType(0); - Tmp1 = Node->getOperand(0); - Tmp2 = Node->getOperand(1); - unsigned Align = Node->getConstantOperandVal(3); - - SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, - MachinePointerInfo(V), - false, false, false, 0); - SDValue VAList = VAListLoad; - - if (Align > TLI.getMinStackArgumentAlignment()) { - assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); - - VAList = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList, - DAG.getConstant(Align - 1, - VAList.getValueType())); - - VAList = DAG.getNode(ISD::AND, dl, VAList.getValueType(), VAList, - DAG.getConstant(-(int64_t)Align, - VAList.getValueType())); - } - - // Increment the pointer, VAList, to the next vaarg - Tmp3 = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList, - DAG.getConstant(TLI.getDataLayout()-> - getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())), - VAList.getValueType())); - // Store the incremented VAList to the legalized pointer - Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, - MachinePointerInfo(V), false, false, 0); - // Load the actual argument out of the pointer VAList - Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(), - false, false, false, 0)); + case ISD::VAARG: + Results.push_back(DAG.expandVAArg(Node)); Results.push_back(Results[0].getValue(1)); break; - } - case ISD::VACOPY: { - // This defaults to loading a pointer from the input and storing it to the - // output, returning the chain. - const Value *VD = cast(Node->getOperand(3))->getValue(); - const Value *VS = cast(Node->getOperand(4))->getValue(); - Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0), - Node->getOperand(2), MachinePointerInfo(VS), - false, false, false, 0); - Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), - MachinePointerInfo(VD), false, false, 0); - Results.push_back(Tmp1); + case ISD::VACOPY: + Results.push_back(DAG.expandVACopy(Node)); break; - } case ISD::EXTRACT_VECTOR_ELT: if (Node->getOperand(0).getValueType().getVectorNumElements() == 1) // This must be an access of the only element. Return it. Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1519,6 +1519,56 @@ return getNode(Opcode, SDLoc(Op), ShTy, Op); } +SDValue SelectionDAG::expandVAArg(SDNode *Node) { + SDLoc dl(Node); + const TargetLowering &TLI = getTargetLoweringInfo(); + const Value *V = cast(Node->getOperand(2))->getValue(); + EVT VT = Node->getValueType(0); + SDValue Tmp1 = Node->getOperand(0); + SDValue Tmp2 = Node->getOperand(1); + unsigned Align = Node->getConstantOperandVal(3); + + SDValue VAListLoad = getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, + MachinePointerInfo(V), false, false, false, 0); + SDValue VAList = VAListLoad; + + if (Align > TLI.getMinStackArgumentAlignment()) { + assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); + + VAList = getNode(ISD::ADD, dl, VAList.getValueType(), VAList, + getConstant(Align - 1, VAList.getValueType())); + + VAList = getNode(ISD::AND, dl, VAList.getValueType(), VAList, + getConstant(-(int64_t)Align, VAList.getValueType())); + } + + // Increment the pointer, VAList, to the next vaarg + Tmp1 = getNode(ISD::ADD, dl, VAList.getValueType(), VAList, + getConstant(TLI.getDataLayout()-> + getTypeAllocSize(VT.getTypeForEVT(*getContext())), + VAList.getValueType())); + // Store the incremented VAList to the legalized pointer + Tmp1 = getStore(VAListLoad.getValue(1), dl, Tmp1, Tmp2, + MachinePointerInfo(V), false, false, 0); + // Load the actual argument out of the pointer VAList + return getLoad(VT, dl, Tmp1, VAList, MachinePointerInfo(), + false, false, false, 0); +} + +SDValue SelectionDAG::expandVACopy(SDNode *Node) { + SDLoc dl(Node); + const TargetLowering &TLI = getTargetLoweringInfo(); + // This defaults to loading a pointer from the input and storing it to the + // output, returning the chain. + const Value *VD = cast(Node->getOperand(3))->getValue(); + const Value *VS = cast(Node->getOperand(4))->getValue(); + SDValue Tmp1 = getLoad(TLI.getPointerTy(), dl, Node->getOperand(0), + Node->getOperand(2), MachinePointerInfo(VS), + false, false, false, 0); + return getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), + MachinePointerInfo(VD), false, false, 0); +} + /// CreateStackTemporary - Create a stack temporary, suitable for holding the /// specified value type. SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -10801,7 +10801,8 @@ const Value *SV = cast(Op.getOperand(2))->getValue(); SDLoc DL(Op); - if (!Subtarget->is64Bit() || Subtarget->isTargetWin64()) { + if (!Subtarget->is64Bit() || Subtarget->isTargetWin64() || + MF.getFunction()->getCallingConv() == CallingConv::X86_64_Win64) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), @@ -10862,12 +10863,17 @@ Subtarget->isTargetDarwin()) && "Unhandled target in LowerVAARG"); assert(Op.getNode()->getNumOperands() == 4); + SDValue Chain = Op.getOperand(0); SDValue SrcPtr = Op.getOperand(1); const Value *SV = cast(Op.getOperand(2))->getValue(); unsigned Align = Op.getConstantOperandVal(3); SDLoc dl(Op); + if (SV->getType() == Type::getInt8PtrTy(SV->getContext())->getPointerTo()) + // The Win64 ABI uses char* instead of a structure. + return DAG.expandVAArg(Op.getNode()); + EVT ArgVT = Op.getNode()->getValueType(0); Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); uint32_t ArgSize = getDataLayout()->getTypeAllocSize(ArgTy); @@ -10934,6 +10940,14 @@ const Value *SrcSV = cast(Op.getOperand(4))->getValue(); SDLoc DL(Op); + if (const BitCastInst *DstCast = dyn_cast(DstSV)) + if (const BitCastInst *SrcCast = dyn_cast(SrcSV)) + if (DstCast->getSrcTy() == + Type::getInt8PtrTy(DstCast->getContext())->getPointerTo() && + SrcCast->getSrcTy() == + Type::getInt8PtrTy(SrcCast->getContext())->getPointerTo()) + // Probably a Win64 va_copy. + return DAG.expandVACopy(Op.getNode()); return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(24), 8, /*isVolatile*/false, false, Index: lib/Transforms/Instrumentation/MemorySanitizer.cpp =================================================================== --- lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -1921,6 +1921,8 @@ } void visitVAStartInst(VAStartInst &I) { + if (F.getCallingConv() == CallingConv::X86_64_Win64) + return; IRBuilder<> IRB(&I); VAStartInstrumentationList.push_back(&I); Value *VAListTag = I.getArgOperand(0); @@ -1933,9 +1935,13 @@ } void visitVACopyInst(VACopyInst &I) { + Value *VAList = I.getArgOperand(0); + if (BitCastInst *Cast = dyn_cast(VAList)) + if (Cast->getSrcTy() == + Type::getInt8PtrTy(Cast->getContext())->getPointerTo()) + return; IRBuilder<> IRB(&I); - Value *VAListTag = I.getArgOperand(0); - Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB); + Value *ShadowPtr = MSV.getShadowPtr(VAList, IRB.getInt8Ty(), IRB); // Unpoison the whole __va_list_tag. // FIXME: magic ABI constants. Index: test/CodeGen/X86/x86-64-ms_abi-vararg.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/x86-64-ms_abi-vararg.ll @@ -0,0 +1,143 @@ +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-pc-linux-gnu | FileCheck %s + +; Verify that the var arg parameters which are passed in registers are stored +; in home stack slots allocated by the caller and that AP is correctly +; calculated. +define x86_64_win64cc void @average_va(i32 %count, ...) nounwind { +entry: +; CHECK: movq %r9, 32(%rsp) +; CHECK: movq %r8, 24(%rsp) +; CHECK: movq %rdx, 16(%rsp) +; CHECK: leaq 16(%rsp), %rax + + %ap = alloca i8*, align 8 ; [#uses=1] + %ap1 = bitcast i8** %ap to i8* ; [#uses=1] + call void @llvm.va_start(i8* %ap1) + ret void +} + +declare void @llvm.va_start(i8*) nounwind +declare void @llvm.va_copy(i8*, i8*) nounwind +declare void @llvm.va_end(i8*) nounwind + +; CHECK-LABEL: f5: +; CHECK: leaq 48(%rsp), +define x86_64_win64cc i8* @f5(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, ...) nounwind { +entry: + %ap = alloca i8*, align 8 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + ret i8* %ap1 +} + +; CHECK-LABEL: f4: +; CHECK: leaq 40(%rsp), +define x86_64_win64cc i8* @f4(i64 %a0, i64 %a1, i64 %a2, i64 %a3, ...) nounwind { +entry: + %ap = alloca i8*, align 8 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + ret i8* %ap1 +} + +; CHECK-LABEL: f3: +; CHECK: leaq 32(%rsp), +define x86_64_win64cc i8* @f3(i64 %a0, i64 %a1, i64 %a2, ...) nounwind { +entry: + %ap = alloca i8*, align 8 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + ret i8* %ap1 +} + +; WinX86_64 uses char* for va_list. Verify that the correct amount of bytes +; are copied using va_copy. + +; CHECK-LABEL: copy1: +; CHECK: leaq 16(%rsp), [[REG_copy1:%[a-z]+]] +; CHECK: movq [[REG_copy1]], -8(%rsp) +; CHECK: movq [[REG_copy1]], -16(%rsp) +; CHECK: ret +define x86_64_win64cc void @copy1(i64 %a0, ...) nounwind { +entry: + %ap = alloca i8*, align 8 + %cp = alloca i8*, align 8 + %ap1 = bitcast i8** %ap to i8* + %cp1 = bitcast i8** %cp to i8* + call void @llvm.va_start(i8* %ap1) + call void @llvm.va_copy(i8* %cp1, i8* %ap1) + ret void +} + +; CHECK-LABEL: copy4: +; CHECK: leaq 40(%rsp), [[REG_copy4:%[a-z]+]] +; CHECK: movq [[REG_copy4]], -8(%rsp) +; CHECK: movq [[REG_copy4]], -16(%rsp) +; CHECK: ret +define x86_64_win64cc void @copy4(i64 %a0, i64 %a1, i64 %a2, i64 %a3, ...) nounwind { +entry: + %ap = alloca i8*, align 8 + %cp = alloca i8*, align 8 + %ap1 = bitcast i8** %ap to i8* + %cp1 = bitcast i8** %cp to i8* + call void @llvm.va_start(i8* %ap1) + call void @llvm.va_copy(i8* %cp1, i8* %ap1) + ret void +} + +; CHECK-LABEL: arg4: +; va_start: +; CHECK: leaq 40(%rsp), [[REG_arg4_1:%[a-z]+]] +; CHECK: movq [[REG_arg4_1]], -8(%rsp) +; va_arg: +; CHECK: leaq 44(%rsp), [[REG_arg4_2:%[a-z]+]] +; CHECK: movq [[REG_arg4_2]], -8(%rsp) +; CHECK: movl 40(%rsp), %eax +; CHECK: ret +define x86_64_win64cc i32 @arg4(i64 %a0, i64 %a1, i64 %a2, i64 %a3, ...) nounwind { +entry: + %ap = alloca i8*, align 8 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %tmp = va_arg i8** %ap, i32 + ret i32 %tmp +} + +; Check that if an ms_va_list gets passed from an ms_abi function to a sysv_abi +; function, the ms_va_list still works properly. +; CHECK-LABEL: win64_func: +; CHECK: pushq %{{[a-z]+}} +; CHECK: leaq 24(%rsp), %rdi +; CHECK: movq %rdi, (%rsp) +; CHECK: callq sysv_func +; CHECK: ret +define x86_64_win64cc i32 @win64_func(i64 %a, ...) nounwind { +entry: + %ap = alloca i8*, align 8 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %0 = load i8** %ap + %1 = call i32 @sysv_func(i8* %0) + call void @llvm.va_end(i8* %ap1) + ret i32 %1 +} + +; CHECK-LABEL: sysv_func: +; CHECK: movq %rdi, -8(%rsp) +; CHECK: movq %rdi, -16(%rsp) +; CHECK: leaq 4(%rdi), [[REG_sysv_f:%[a-z]+]] +; CHECK: movq [[REG_sysv_f]], -16(%rsp) +; CHECK: movl (%rdi), %eax +; CHECK: ret +define x86_64_sysvcc i32 @sysv_func(i8* %ap) nounwind { +entry: + %0 = alloca i8*, align 8 + store i8* %ap, i8** %0, align 8 + %cp = alloca i8*, align 8 + %ap1 = bitcast i8** %0 to i8* + %cp1 = bitcast i8** %cp to i8* + call void @llvm.va_copy(i8* %cp1, i8* %ap1) + %tmp = va_arg i8** %cp, i32 + call void @llvm.va_end(i8* %cp1) + ret i32 %tmp +}