Index: llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -54,6 +54,16 @@ #define DEBUG_TYPE "statepoint-lowering" +// User may need stack offsets to be N byte aligned. So allocate 8 * N bit +// integer size stack slot for value type whose size is less than that. +// The high bits are undefined here but the runtime will sign extend them +// and read them correctly. +// TODO: This really should be a property of the callers calling convention. +static cl::opt PromoteSlotBitsInSelectionDAG( + "promote-slot-bits-in-selection-dag", cl::Hidden, cl::init(0), + cl::desc("Promote stack slots to at least the specified number of bits to" + " ABI requirements in SelectonDAG")); + STATISTIC(NumSlotsAllocatedForStatepoints, "Number of stack slots allocated for statepoints"); STATISTIC(NumOfStatepoints, "Number of statepoint nodes encountered"); @@ -109,6 +119,10 @@ SDValue StatepointLoweringState::allocateStackSlot(EVT ValueType, SelectionDAGBuilder &Builder) { + if (PromoteSlotBitsInSelectionDAG) + assert(ValueType.getSizeInBits() >= PromoteSlotBitsInSelectionDAG && + "Invariant for downstream code!"); + NumSlotsAllocatedForStatepoints++; MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo(); @@ -157,6 +171,25 @@ return SpillSlot; } +/// If necessary promote the value to an appropriate type such that any spills +/// of the resulting value are properly sized slots per the calling convention. +static SDValue promoteValueIfNeededForSlot(SelectionDAGBuilder &Builder, + SDValue Incoming) { + EVT VTy = Incoming.getValueType(); + if (PromoteSlotBitsInSelectionDAG && + VTy.getSizeInBits() < PromoteSlotBitsInSelectionDAG) { + if (!VTy.isInteger()) { + // Convert to the corresponding integer type, so that we can extend + VTy = EVT::getIntegerVT(*Builder.DAG.getContext(), VTy.getSizeInBits()); + Incoming = Builder.DAG.getNode(ISD::BITCAST, SDLoc(), VTy, Incoming); + } + EVT VTp = EVT::getIntegerVT(*Builder.DAG.getContext(), + PromoteSlotBitsInSelectionDAG); + Incoming = Builder.DAG.getZExtOrTrunc(Incoming, SDLoc(), VTp); + } + return Incoming; +} + /// Utility function for reservePreviousStackSlotForValue. Tries to find /// stack slot index to which we have spilled value for previous statepoints. /// LookUpDepth specifies maximum DFS depth this function is allowed to look. @@ -375,8 +408,13 @@ // Emit new store if we didn't do it for this ptr before if (!Loc.getNode()) { - Loc = Builder.StatepointLowering.allocateStackSlot(Incoming.getValueType(), - Builder); + EVT VTy = Incoming.getValueType(); + if (PromoteSlotBitsInSelectionDAG && + VTy.getSizeInBits() < PromoteSlotBitsInSelectionDAG) + VTy = EVT::getIntegerVT(*Builder.DAG.getContext(), + PromoteSlotBitsInSelectionDAG); + + Loc = Builder.StatepointLowering.allocateStackSlot(VTy, Builder); int Index = cast(Loc)->getIndex(); // We use TargetFrameIndex so that isel will not select it into LEA Loc = Builder.DAG.getTargetFrameIndex(Index, Builder.getFrameIndexTy()); @@ -387,10 +425,17 @@ // can consider allowing spills of smaller values to larger slots // (i.e. change the '==' in the assert below to a '>='). MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo(); - assert((MFI.getObjectSize(Index) * 8) == - (-8 & (7 + // Round up modulo 8. - (int64_t)Incoming.getValueSizeInBits())) && + +#ifndef NDEBUG + int64_t ExpectedSlotSize = (int64_t)Incoming.getValueSizeInBits(); + if (PromoteSlotBitsInSelectionDAG) + ExpectedSlotSize = + std::max(PromoteSlotBitsInSelectionDAG, + Incoming.getValueType().getSizeInBits()); + ExpectedSlotSize = (ExpectedSlotSize + 7) & -8; // Round up modulo 8. + assert((MFI.getObjectSize(Index) * 8) == ExpectedSlotSize && "Bad spill: stack slot does not match!"); +#endif // Note: Using the alignment of the spill slot (rather than the abi or // preferred alignment) is required for correctness when dealing with spill @@ -474,6 +519,7 @@ // of a late use so these values might be placed in registers which are // clobbered by the call. This is fine for live-in. For live-through // fix-up pass should be executed to force spilling of such registers. + Incoming = promoteValueIfNeededForSlot(Builder, Incoming); Ops.push_back(Incoming); } else { // Otherwise, locate a spill slot and explicitly spill it so it can be Index: llvm/test/CodeGen/X86/statepoint-spill-slot-size-promotion-2.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/statepoint-spill-slot-size-promotion-2.ll @@ -0,0 +1,24 @@ +; RUN: llc -O3 -promote-slot-bits-in-selection-dag=64 < %s +; RUN: llc -O3 -promote-slot-bits-in-selection-dag=63 < %s +; RUN: llc -O3 -promote-slot-bits-in-selection-dag=65 < %s +; RUN: llc -O3 -promote-slot-bits-in-selection-dag=128 < %s +; +; XFAIL: * +; Values 63, 65 and 128 cause 2 different crashes showing there are unsupported values +; of the option -promote-slot-bits-in-selection-dag for live-in deopt lowering values: +; - wider than 64-bit +; - not divisible by 8 bit sized +; + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-unknown" + +declare void @foo() + +declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 , i32 , void ()*, i32 , i32 , ...) + +define void @test(float %v1) gc "statepoint-example" { + %statepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @foo, i32 0, i32 2, i32 0, i32 0) + "deopt-lowering"="live-in" [ "deopt" (float %v1) ] + ret void +} Index: llvm/test/CodeGen/X86/statepoint-spill-slot-size-promotion.ll =================================================================== --- llvm/test/CodeGen/X86/statepoint-spill-slot-size-promotion.ll +++ llvm/test/CodeGen/X86/statepoint-spill-slot-size-promotion.ll @@ -1,7 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py then fixed by hand. -; RUN: llc -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UNDEF +; RUN: llc -verify-machineinstrs -promote-slot-bits-in-selection-dag=0 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-0 +; RUN: llc -verify-machineinstrs -promote-slot-bits-in-selection-dag=8 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-8 +; RUN: llc -verify-machineinstrs -promote-slot-bits-in-selection-dag=9 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-9 +; RUN: llc -verify-machineinstrs -promote-slot-bits-in-selection-dag=15 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-15 +; RUN: llc -verify-machineinstrs -promote-slot-bits-in-selection-dag=16 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-16 +; RUN: llc -verify-machineinstrs -promote-slot-bits-in-selection-dag=32 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-32 +; RUN: llc -verify-machineinstrs -promote-slot-bits-in-selection-dag=64 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-64 +; RUN: llc -verify-machineinstrs -promote-slot-bits-in-selection-dag=128 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-128 ; ; Test different type sizes of deop bundle operands. +; Test that deopt value spill slots are extended to what is specified +; with -promote-slot-bits-in-selection-dag. ; target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-win64" @@ -11,35 +21,277 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: subq $32, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset %rbx, -16 -; CHECK-NEXT: movl %edi, %ebx -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r11 -; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: movzwl {{[0-9]+}}(%rsp), %edi -; CHECK-NEXT: movw %di, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movl %eax, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %r11, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %r10, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movb %cl, {{[0-9]+}}(%rsp) -; CHECK-NEXT: andb $3, %sil -; CHECK-NEXT: movb %sil, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movl %ebx, %eax -; CHECK-NEXT: andl $1, %eax -; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp) -; CHECK-NEXT: andb $127, %dl -; CHECK-NEXT: movb %dl, {{[0-9]+}}(%rsp) -; CHECK-NEXT: andl $511, %r8d # imm = 0x1FF -; CHECK-NEXT: movw %r8w, {{[0-9]+}}(%rsp) -; CHECK-NEXT: andl $32767, %r9d # imm = 0x7FFF -; CHECK-NEXT: movw %r9w, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movabsq $140727162896504, %rax # imm = 0x7FFD988E0078 -; CHECK-NEXT: callq *%rax -; CHECK-NEXT: .Ltmp0: -; CHECK-NEXT: movl %ebx, %eax -; CHECK-NEXT: addq $32, %rsp + +; CHECK-UNDEF-NEXT: subq $32, %rsp +; CHECK-UNDEF-NEXT: .cfi_def_cfa_offset 48 +; CHECK-UNDEF-NEXT: .cfi_offset %rbx, -16 +; CHECK-UNDEF-NEXT: movl %edi, %ebx +; CHECK-UNDEF-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; CHECK-UNDEF-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; CHECK-UNDEF-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-UNDEF-NEXT: movzwl {{[0-9]+}}(%rsp), %edi +; CHECK-UNDEF-NEXT: movw %di, {{[0-9]+}}(%rsp) +; CHECK-UNDEF-NEXT: movl %eax, {{[0-9]+}}(%rsp) +; CHECK-UNDEF-NEXT: movq %r11, {{[0-9]+}}(%rsp) +; CHECK-UNDEF-NEXT: movq %r10, {{[0-9]+}}(%rsp) +; CHECK-UNDEF-NEXT: movb %cl, {{[0-9]+}}(%rsp) +; CHECK-UNDEF-NEXT: andb $3, %sil +; CHECK-UNDEF-NEXT: movb %sil, {{[0-9]+}}(%rsp) +; CHECK-UNDEF-NEXT: movl %ebx, %eax +; CHECK-UNDEF-NEXT: andl $1, %eax +; CHECK-UNDEF-NEXT: movb %al, {{[0-9]+}}(%rsp) +; CHECK-UNDEF-NEXT: andb $127, %dl +; CHECK-UNDEF-NEXT: movb %dl, {{[0-9]+}}(%rsp) +; CHECK-UNDEF-NEXT: andl $511, %r8d # imm = 0x1FF +; CHECK-UNDEF-NEXT: movw %r8w, {{[0-9]+}}(%rsp) +; CHECK-UNDEF-NEXT: andl $32767, %r9d # imm = 0x7FFF +; CHECK-UNDEF-NEXT: movw %r9w, {{[0-9]+}}(%rsp) +; CHECK-UNDEF-NEXT: movabsq $140727162896504, %rax # imm = 0x7FFD988E0078 +; CHECK-UNDEF-NEXT: callq *%rax +; CHECK-UNDEF-NEXT: .Ltmp0: +; CHECK-UNDEF-NEXT: movl %ebx, %eax +; CHECK-UNDEF-NEXT: addq $32, %rsp + +; CHECK-0-NEXT: subq $32, %rsp +; CHECK-0-NEXT: .cfi_def_cfa_offset 48 +; CHECK-0-NEXT: .cfi_offset %rbx, -16 +; CHECK-0-NEXT: movl %edi, %ebx +; CHECK-0-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; CHECK-0-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; CHECK-0-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-0-NEXT: movzwl {{[0-9]+}}(%rsp), %edi +; CHECK-0-NEXT: movw %di, {{[0-9]+}}(%rsp) +; CHECK-0-NEXT: movl %eax, {{[0-9]+}}(%rsp) +; CHECK-0-NEXT: movq %r11, {{[0-9]+}}(%rsp) +; CHECK-0-NEXT: movq %r10, {{[0-9]+}}(%rsp) +; CHECK-0-NEXT: movb %cl, {{[0-9]+}}(%rsp) +; CHECK-0-NEXT: andb $3, %sil +; CHECK-0-NEXT: movb %sil, {{[0-9]+}}(%rsp) +; CHECK-0-NEXT: movl %ebx, %eax +; CHECK-0-NEXT: andl $1, %eax +; CHECK-0-NEXT: movb %al, {{[0-9]+}}(%rsp) +; CHECK-0-NEXT: andb $127, %dl +; CHECK-0-NEXT: movb %dl, {{[0-9]+}}(%rsp) +; CHECK-0-NEXT: andl $511, %r8d # imm = 0x1FF +; CHECK-0-NEXT: movw %r8w, {{[0-9]+}}(%rsp) +; CHECK-0-NEXT: andl $32767, %r9d # imm = 0x7FFF +; CHECK-0-NEXT: movw %r9w, {{[0-9]+}}(%rsp) +; CHECK-0-NEXT: movabsq $140727162896504, %rax # imm = 0x7FFD988E0078 +; CHECK-0-NEXT: callq *%rax +; CHECK-0-NEXT: .Ltmp0: +; CHECK-0-NEXT: movl %ebx, %eax +; CHECK-0-NEXT: addq $32, %rsp + +; CHECK-8-NEXT: subq $32, %rsp +; CHECK-8-NEXT: .cfi_def_cfa_offset 48 +; CHECK-8-NEXT: .cfi_offset %rbx, -16 +; CHECK-8-NEXT: movl %edi, %ebx +; CHECK-8-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; CHECK-8-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; CHECK-8-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-8-NEXT: movzwl {{[0-9]+}}(%rsp), %edi +; CHECK-8-NEXT: movw %di, {{[0-9]+}}(%rsp) +; CHECK-8-NEXT: movl %eax, {{[0-9]+}}(%rsp) +; CHECK-8-NEXT: movq %r11, {{[0-9]+}}(%rsp) +; CHECK-8-NEXT: movq %r10, {{[0-9]+}}(%rsp) +; CHECK-8-NEXT: movb %cl, {{[0-9]+}}(%rsp) +; CHECK-8-NEXT: andb $3, %sil +; CHECK-8-NEXT: movb %sil, {{[0-9]+}}(%rsp) +; CHECK-8-NEXT: movl %ebx, %eax +; CHECK-8-NEXT: andl $1, %eax +; CHECK-8-NEXT: movb %al, {{[0-9]+}}(%rsp) +; CHECK-8-NEXT: andb $127, %dl +; CHECK-8-NEXT: movb %dl, {{[0-9]+}}(%rsp) +; CHECK-8-NEXT: andl $511, %r8d # imm = 0x1FF +; CHECK-8-NEXT: movw %r8w, {{[0-9]+}}(%rsp) +; CHECK-8-NEXT: andl $32767, %r9d # imm = 0x7FFF +; CHECK-8-NEXT: movw %r9w, {{[0-9]+}}(%rsp) +; CHECK-8-NEXT: movabsq $140727162896504, %rax # imm = 0x7FFD988E0078 +; CHECK-8-NEXT: callq *%rax +; CHECK-8-NEXT: .Ltmp0: +; CHECK-8-NEXT: movl %ebx, %eax +; CHECK-8-NEXT: addq $32, %rsp + +; CHECK-9-NEXT: subq $48, %rsp +; CHECK-9-NEXT: .cfi_def_cfa_offset 64 +; CHECK-9-NEXT: .cfi_offset %rbx, -16 +; CHECK-9-NEXT: movl %edi, %ebx +; CHECK-9-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; CHECK-9-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; CHECK-9-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-9-NEXT: movzwl {{[0-9]+}}(%rsp), %edi +; CHECK-9-NEXT: movw %di, {{[0-9]+}}(%rsp) +; CHECK-9-NEXT: movl %eax, {{[0-9]+}}(%rsp) +; CHECK-9-NEXT: movq %r11, {{[0-9]+}}(%rsp) +; CHECK-9-NEXT: movq %r10, {{[0-9]+}}(%rsp) +; CHECK-9-NEXT: movb %cl, {{[0-9]+}}(%rsp) +; CHECK-9-NEXT: andb $3, %sil +; CHECK-9-NEXT: movb %sil, {{[0-9]+}}(%rsp) +; CHECK-9-NEXT: movl %ebx, %eax +; CHECK-9-NEXT: andl $1, %eax +; CHECK-9-NEXT: movb %al, {{[0-9]+}}(%rsp) +; CHECK-9-NEXT: andb $127, %dl +; CHECK-9-NEXT: movb %dl, {{[0-9]+}}(%rsp) +; CHECK-9-NEXT: andl $511, %r8d # imm = 0x1FF +; CHECK-9-NEXT: movw %r8w, {{[0-9]+}}(%rsp) +; CHECK-9-NEXT: andl $32767, %r9d # imm = 0x7FFF +; CHECK-9-NEXT: movw %r9w, {{[0-9]+}}(%rsp) +; CHECK-9-NEXT: movabsq $140727162896504, %rax # imm = 0x7FFD988E0078 +; CHECK-9-NEXT: callq *%rax +; CHECK-9-NEXT: .Ltmp0: +; CHECK-9-NEXT: movl %ebx, %eax +; CHECK-9-NEXT: addq $48, %rsp + +; CHECK-15-NEXT: subq $48, %rsp +; CHECK-15-NEXT: .cfi_def_cfa_offset 64 +; CHECK-15-NEXT: .cfi_offset %rbx, -16 +; CHECK-15-NEXT: movl %edi, %ebx +; CHECK-15-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; CHECK-15-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; CHECK-15-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-15-NEXT: movzwl {{[0-9]+}}(%rsp), %edi +; CHECK-15-NEXT: movw %di, {{[0-9]+}}(%rsp) +; CHECK-15-NEXT: movl %eax, {{[0-9]+}}(%rsp) +; CHECK-15-NEXT: movq %r11, {{[0-9]+}}(%rsp) +; CHECK-15-NEXT: movq %r10, {{[0-9]+}}(%rsp) +; CHECK-15-NEXT: movb %cl, {{[0-9]+}}(%rsp) +; CHECK-15-NEXT: andb $3, %sil +; CHECK-15-NEXT: movb %sil, {{[0-9]+}}(%rsp) +; CHECK-15-NEXT: movl %ebx, %eax +; CHECK-15-NEXT: andl $1, %eax +; CHECK-15-NEXT: movb %al, {{[0-9]+}}(%rsp) +; CHECK-15-NEXT: andb $127, %dl +; CHECK-15-NEXT: movb %dl, {{[0-9]+}}(%rsp) +; CHECK-15-NEXT: andl $511, %r8d # imm = 0x1FF +; CHECK-15-NEXT: movw %r8w, {{[0-9]+}}(%rsp) +; CHECK-15-NEXT: andl $32767, %r9d # imm = 0x7FFF +; CHECK-15-NEXT: movw %r9w, {{[0-9]+}}(%rsp) +; CHECK-15-NEXT: movabsq $140727162896504, %rax # imm = 0x7FFD988E0078 +; CHECK-15-NEXT: callq *%rax +; CHECK-15-NEXT: .Ltmp0: +; CHECK-15-NEXT: movl %ebx, %eax +; CHECK-15-NEXT: addq $48, %rsp + +; CHECK-16-NEXT: subq $48, %rsp +; CHECK-16-NEXT: .cfi_def_cfa_offset 64 +; CHECK-16-NEXT: .cfi_offset %rbx, -16 +; CHECK-16-NEXT: movl %edi, %ebx +; CHECK-16-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; CHECK-16-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; CHECK-16-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-16-NEXT: movzwl {{[0-9]+}}(%rsp), %edi +; CHECK-16-NEXT: movw %di, {{[0-9]+}}(%rsp) +; CHECK-16-NEXT: movl %eax, {{[0-9]+}}(%rsp) +; CHECK-16-NEXT: movq %r11, {{[0-9]+}}(%rsp) +; CHECK-16-NEXT: movq %r10, {{[0-9]+}}(%rsp) +; CHECK-16-NEXT: movb %cl, {{[0-9]+}}(%rsp) +; CHECK-16-NEXT: andb $3, %sil +; CHECK-16-NEXT: movb %sil, {{[0-9]+}}(%rsp) +; CHECK-16-NEXT: movl %ebx, %eax +; CHECK-16-NEXT: andl $1, %eax +; CHECK-16-NEXT: movb %al, {{[0-9]+}}(%rsp) +; CHECK-16-NEXT: andb $127, %dl +; CHECK-16-NEXT: movb %dl, {{[0-9]+}}(%rsp) +; CHECK-16-NEXT: andl $511, %r8d # imm = 0x1FF +; CHECK-16-NEXT: movw %r8w, {{[0-9]+}}(%rsp) +; CHECK-16-NEXT: andl $32767, %r9d # imm = 0x7FFF +; CHECK-16-NEXT: movw %r9w, {{[0-9]+}}(%rsp) +; CHECK-16-NEXT: movabsq $140727162896504, %rax # imm = 0x7FFD988E0078 +; CHECK-16-NEXT: callq *%rax +; CHECK-16-NEXT: .Ltmp0: +; CHECK-16-NEXT: movl %ebx, %eax +; CHECK-16-NEXT: addq $48, %rsp + +; CHECK-32-NEXT: subq $48, %rsp +; CHECK-32-NEXT: .cfi_def_cfa_offset 64 +; CHECK-32-NEXT: .cfi_offset %rbx, -16 +; CHECK-32-NEXT: movl %edi, %ebx +; CHECK-32-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; CHECK-32-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; CHECK-32-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-32-NEXT: movzwl {{[0-9]+}}(%rsp), %edi +; CHECK-32-NEXT: movw %di, {{[0-9]+}}(%rsp) +; CHECK-32-NEXT: movl %eax, (%rsp) +; CHECK-32-NEXT: movq %r11, {{[0-9]+}}(%rsp) +; CHECK-32-NEXT: movq %r10, {{[0-9]+}}(%rsp) +; CHECK-32-NEXT: movb %cl, {{[0-9]+}}(%rsp) +; CHECK-32-NEXT: andb $3, %sil +; CHECK-32-NEXT: movb %sil, {{[0-9]+}}(%rsp) +; CHECK-32-NEXT: movl %ebx, %eax +; CHECK-32-NEXT: andl $1, %eax +; CHECK-32-NEXT: movb %al, {{[0-9]+}}(%rsp) +; CHECK-32-NEXT: andb $127, %dl +; CHECK-32-NEXT: movb %dl, {{[0-9]+}}(%rsp) +; CHECK-32-NEXT: andl $511, %r8d # imm = 0x1FF +; CHECK-32-NEXT: movw %r8w, {{[0-9]+}}(%rsp) +; CHECK-32-NEXT: andl $32767, %r9d # imm = 0x7FFF +; CHECK-32-NEXT: movw %r9w, {{[0-9]+}}(%rsp) +; CHECK-32-NEXT: movabsq $140727162896504, %rax # imm = 0x7FFD988E0078 +; CHECK-32-NEXT: callq *%rax +; CHECK-32-NEXT: .Ltmp0: +; CHECK-32-NEXT: movl %ebx, %eax +; CHECK-32-NEXT: addq $48, %rsp + +; CHECK-64-NEXT: subq $80, %rsp +; CHECK-64-NEXT: .cfi_def_cfa_offset 96 +; CHECK-64-NEXT: .cfi_offset %rbx, -16 +; CHECK-64-NEXT: movl %edi, %ebx +; CHECK-64-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; CHECK-64-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; CHECK-64-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-64-NEXT: movzwl {{[0-9]+}}(%rsp), %edi +; CHECK-64-NEXT: movw %di, {{[0-9]+}}(%rsp) +; CHECK-64-NEXT: movl %eax, {{[0-9]+}}(%rsp) +; CHECK-64-NEXT: movq %r11, {{[0-9]+}}(%rsp) +; CHECK-64-NEXT: movq %r10, (%rsp) +; CHECK-64-NEXT: movb %cl, {{[0-9]+}}(%rsp) +; CHECK-64-NEXT: andb $3, %sil +; CHECK-64-NEXT: movb %sil, {{[0-9]+}}(%rsp) +; CHECK-64-NEXT: movl %ebx, %eax +; CHECK-64-NEXT: andl $1, %eax +; CHECK-64-NEXT: movb %al, {{[0-9]+}}(%rsp) +; CHECK-64-NEXT: andb $127, %dl +; CHECK-64-NEXT: movb %dl, {{[0-9]+}}(%rsp) +; CHECK-64-NEXT: andl $511, %r8d # imm = 0x1FF +; CHECK-64-NEXT: movw %r8w, {{[0-9]+}}(%rsp) +; CHECK-64-NEXT: andl $32767, %r9d # imm = 0x7FFF +; CHECK-64-NEXT: movw %r9w, {{[0-9]+}}(%rsp) +; CHECK-64-NEXT: movabsq $140727162896504, %rax # imm = 0x7FFD988E0078 +; CHECK-64-NEXT: callq *%rax +; CHECK-64-NEXT: .Ltmp0: +; CHECK-64-NEXT: movl %ebx, %eax +; CHECK-64-NEXT: addq $80, %rsp + +; CHECK-128-NEXT: subq $160, %rsp +; CHECK-128-NEXT: .cfi_def_cfa_offset 176 +; CHECK-128-NEXT: .cfi_offset %rbx, -16 +; CHECK-128-NEXT: movl %edi, %ebx +; CHECK-128-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; CHECK-128-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; CHECK-128-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-128-NEXT: movzwl {{[0-9]+}}(%rsp), %edi +; CHECK-128-NEXT: movw %di, {{[0-9]+}}(%rsp) +; CHECK-128-NEXT: movl %eax, {{[0-9]+}}(%rsp) +; CHECK-128-NEXT: movq %r11, {{[0-9]+}}(%rsp) +; CHECK-128-NEXT: movq %r10, (%rsp) +; CHECK-128-NEXT: movb %cl, {{[0-9]+}}(%rsp) +; CHECK-128-NEXT: andb $3, %sil +; CHECK-128-NEXT: movb %sil, {{[0-9]+}}(%rsp) +; CHECK-128-NEXT: movl %ebx, %eax +; CHECK-128-NEXT: andl $1, %eax +; CHECK-128-NEXT: movb %al, {{[0-9]+}}(%rsp) +; CHECK-128-NEXT: andb $127, %dl +; CHECK-128-NEXT: movb %dl, {{[0-9]+}}(%rsp) +; CHECK-128-NEXT: andl $511, %r8d # imm = 0x1FF +; CHECK-128-NEXT: movw %r8w, {{[0-9]+}}(%rsp) +; CHECK-128-NEXT: andl $32767, %r9d # imm = 0x7FFF +; CHECK-128-NEXT: movw %r9w, {{[0-9]+}}(%rsp) +; CHECK-128-NEXT: movabsq $140727162896504, %rax # imm = 0x7FFD988E0078 +; CHECK-128-NEXT: callq *%rax +; CHECK-128-NEXT: .Ltmp0: +; CHECK-128-NEXT: movl %ebx, %eax +; CHECK-128-NEXT: addq $160, %rsp + ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 8