diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -762,6 +762,7 @@ void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi); // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>. bool SplitVectorOperand(SDNode *N, unsigned OpNo); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -848,6 +848,9 @@ case ISD::VECTOR_SHUFFLE: SplitVecRes_VECTOR_SHUFFLE(cast(N), Lo, Hi); break; + case ISD::VAARG: + SplitVecRes_VAARG(N, Lo, Hi); + break; case ISD::ANY_EXTEND_VECTOR_INREG: case ISD::SIGN_EXTEND_VECTOR_INREG: @@ -1840,6 +1843,27 @@ } } +void DAGTypeLegalizer::SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { + EVT OVT = N->getValueType(0); + EVT NVT = OVT.getHalfNumVectorElementsVT(*DAG.getContext()); + SDValue Chain = N->getOperand(0); + SDValue Ptr = N->getOperand(1); + SDValue SV = N->getOperand(2); + SDLoc dl(N); + const unsigned Align = N->getConstantOperandVal(3); + + Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, SV, Align); + Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, SV, 0); + Chain = Hi.getValue(1); + + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); + + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), Chain); +} + //===----------------------------------------------------------------------===// // Operand Vector Splitting diff --git a/llvm/test/CodeGen/X86/legalize-vaarg.ll b/llvm/test/CodeGen/X86/legalize-vaarg.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/legalize-vaarg.ll @@ -0,0 +1,76 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +;RUN: llc < %s -mtriple=x86_64-- -mattr=avx | FileCheck %s + +define <32 x i32> @test_large_vec_vaarg(i32 %n, ...) { +; CHECK-LABEL: test_large_vec_vaarg: +; CHECK: # %bb.0: +; CHECK-NEXT: movl -{{[0-9]+}}(%rsp), %ecx +; CHECK-NEXT: cmpl $24, %ecx +; CHECK-NEXT: jae .LBB0_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: addq %rcx, %rax +; CHECK-NEXT: addl $8, %ecx +; CHECK-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: jmp .LBB0_3 +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: movq (%rsp), %rax +; CHECK-NEXT: addq $127, %rax +; CHECK-NEXT: andq $-128, %rax +; CHECK-NEXT: leaq 32(%rax), %rcx +; CHECK-NEXT: movq %rcx, (%rsp) +; CHECK-NEXT: .LBB0_3: +; CHECK-NEXT: vmovaps (%rax), %ymm0 +; CHECK-NEXT: movl -{{[0-9]+}}(%rsp), %ecx +; CHECK-NEXT: cmpl $24, %ecx +; CHECK-NEXT: jae .LBB0_5 +; CHECK-NEXT: # %bb.4: +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: addq %rcx, %rax +; CHECK-NEXT: addl $8, %ecx +; CHECK-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: jmp .LBB0_6 +; CHECK-NEXT: .LBB0_5: +; CHECK-NEXT: movq (%rsp), %rax +; CHECK-NEXT: movq %rax, %rcx +; CHECK-NEXT: addq $32, %rcx +; CHECK-NEXT: movq %rcx, (%rsp) +; CHECK-NEXT: .LBB0_6: +; CHECK-NEXT: vmovaps (%rax), %ymm1 +; CHECK-NEXT: movl -{{[0-9]+}}(%rsp), %ecx +; CHECK-NEXT: cmpl $24, %ecx +; CHECK-NEXT: jae .LBB0_8 +; CHECK-NEXT: # %bb.7: +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: addq %rcx, %rax +; CHECK-NEXT: addl $8, %ecx +; CHECK-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: jmp .LBB0_9 +; CHECK-NEXT: .LBB0_8: +; CHECK-NEXT: movq (%rsp), %rax +; CHECK-NEXT: movq %rax, %rcx +; CHECK-NEXT: addq $32, %rcx +; CHECK-NEXT: movq %rcx, (%rsp) +; CHECK-NEXT: .LBB0_9: +; CHECK-NEXT: vmovaps (%rax), %ymm2 +; CHECK-NEXT: movl -{{[0-9]+}}(%rsp), %ecx +; CHECK-NEXT: cmpl $24, %ecx +; CHECK-NEXT: jae .LBB0_11 +; CHECK-NEXT: # %bb.10: +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: addq %rcx, %rax +; CHECK-NEXT: addl $8, %ecx +; CHECK-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovaps (%rax), %ymm3 +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB0_11: +; CHECK-NEXT: movq (%rsp), %rax +; CHECK-NEXT: movq %rax, %rcx +; CHECK-NEXT: addq $32, %rcx +; CHECK-NEXT: movq %rcx, (%rsp) +; CHECK-NEXT: vmovaps (%rax), %ymm3 +; CHECK-NEXT: retq + %args = alloca i8*, align 4 + %x = va_arg i8** %args, <32 x i32> + ret <32 x i32> %x +}