diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1507,11 +1507,11 @@ const SmallVectorImpl &ArgInfo, const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, MachineFrameInfo &MFI, - unsigned i) const; + unsigned i, bool IsVarArgWin32) const; SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, const SDLoc &dl, SelectionDAG &DAG, - const CCValAssign &VA, - ISD::ArgFlagsTy Flags, bool isByval) const; + const CCValAssign &VA, ISD::ArgFlagsTy Flags, + bool isByval, bool IsVarArgWin32) const; // Call lowering helpers. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3457,12 +3457,11 @@ return true; } -SDValue -X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, - const SmallVectorImpl &Ins, - const SDLoc &dl, SelectionDAG &DAG, - const CCValAssign &VA, - MachineFrameInfo &MFI, unsigned i) const { +SDValue X86TargetLowering::LowerMemArgument( + SDValue Chain, CallingConv::ID CallConv, + const SmallVectorImpl &Ins, const SDLoc &dl, + SelectionDAG &DAG, const CCValAssign &VA, MachineFrameInfo &MFI, unsigned i, + bool IsVarArgWin32) const { // Create the nodes corresponding to a load from this parameter slot. ISD::ArgFlagsTy Flags = Ins[i].Flags; bool AlwaysUseMutable = shouldGuaranteeTCO( @@ -3563,10 +3562,14 @@ MFI.setObjectSExt(FI, true); } + MaybeAlign Alignment; + if (IsVarArgWin32 && ValVT.isVector()) + Alignment = MaybeAlign(4); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getLoad( ValVT, dl, Chain, FIN, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), + Alignment); return ExtendedInMem ? (VA.getValVT().isVector() ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val) @@ -3964,8 +3967,10 @@ } } else { assert(VA.isMemLoc()); - ArgValue = - LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex); + bool IsVarArgWin32 = + IsVarArg && Subtarget.isTargetWindowsMSVC() && !Is64Bit; + ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, + InsIndex, IsVarArgWin32); } // If value is passed via pointer - do a load. @@ -4076,8 +4081,8 @@ SDValue Arg, const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, - ISD::ArgFlagsTy Flags, - bool isByVal) const { + ISD::ArgFlagsTy Flags, bool isByVal, + bool IsVarArgWin32) const { unsigned LocMemOffset = VA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), @@ -4085,9 +4090,13 @@ if (isByVal) return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); + MaybeAlign Alignment; + if (IsVarArgWin32 && Arg.getSimpleValueType().isVector()) + Alignment = MaybeAlign(4); return DAG.getStore( Chain, dl, Arg, PtrOff, - MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset)); + MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset), + Alignment); } /// Emit a load of return address if tail call @@ -4391,8 +4400,10 @@ if (!StackPtr.getNode()) StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), getPointerTy(DAG.getDataLayout())); - MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, - dl, DAG, VA, Flags, isByVal)); + bool IsVarArgWin32 = + isVarArg && Subtarget.isTargetWindowsMSVC() && !Is64Bit; + MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, dl, DAG, VA, + Flags, isByVal, IsVarArgWin32)); } } diff --git a/llvm/test/CodeGen/X86/vaargs-win32.ll b/llvm/test/CodeGen/X86/vaargs-win32.ll --- a/llvm/test/CodeGen/X86/vaargs-win32.ll +++ b/llvm/test/CodeGen/X86/vaargs-win32.ll @@ -38,7 +38,7 @@ ; MSVC-LABEL: foo: ; MSVC: # %bb.0: ; MSVC-NEXT: pushl %eax -; MSVC-NEXT: movaps 8(%esp), %xmm0 +; MSVC-NEXT: movups 8(%esp), %xmm0 ; MSVC-NEXT: movups 24(%esp), %xmm1 ; MSVC-NEXT: cmpltps %xmm1, %xmm0 ; MSVC-NEXT: popl %eax @@ -73,9 +73,9 @@ ; MSVC: # %bb.0: ; MSVC-NEXT: subl $32, %esp ; MSVC-NEXT: movaps {{.*#+}} xmm0 = [5.0E+0,6.0E+0,7.0E+0,8.0E+0] -; MSVC-NEXT: movaps %xmm0, 16(%esp) +; MSVC-NEXT: movups %xmm0, 16(%esp) ; MSVC-NEXT: movaps {{.*#+}} xmm0 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0] -; MSVC-NEXT: movaps %xmm0, (%esp) +; MSVC-NEXT: movups %xmm0, (%esp) ; MSVC-NEXT: calll _foo ; MSVC-NEXT: addl $32, %esp ; MSVC-NEXT: retl