Index: lib/Target/TargetMachine.cpp =================================================================== --- lib/Target/TargetMachine.cpp +++ lib/Target/TargetMachine.cpp @@ -116,12 +116,24 @@ if (GV && GV->isDSOLocal()) return true; - // According to the llvm language reference, we should be able to just return - // false in here if we have a GV, as we know it is dso_preemptable. - // At this point in time, the various IR producers have not been transitioned - // to always produce a dso_local when it is possible to do so. As a result we - // still have some pre-dso_local logic in here to improve the quality of the - // generated code: + // If we are not supossed to use a PLT, we cannot assume that intrinsics are + // local since the linker can convert some direct access to access via plt. + if (M.getRtLibUseGOT() && GV == nullptr) + return false; + + // According to the llvm language reference, we should be able to + // just return false in here if we have a GV, as we know it is + // dso_preemptable. At this point in time, the various IR producers + // have not been transitioned to always produce a dso_local when it + // is possible to do so. + // In the case of intrinsics, GV is null and there is nowhere to put + // dso_local. Returning false for those will produce worse code in some + // architectures. For example, on x86 the caller has to set ebx before calling + // a plt. + // As a result we still have some logic in here to improve the quality of the + // generated code. + // FIXME: Add a module level metadata for whether intrinsics should be assumed + // local. Reloc::Model RM = getRelocationModel(); const Triple &TT = getTargetTriple(); @@ -137,27 +149,20 @@ if (TT.isOSBinFormatCOFF() || (TT.isOSWindows() && TT.isOSBinFormatMachO())) return true; - // If GV is null we know that this is a call to an intrinsic. For ELF and - // MachO we don't need to assume those are local since the liker can trivially - // convert a call to a PLT to a direct call if the target (in the runtime - // library) turns out to be local. - if (!GV) - return false; - // Most PIC code sequences that assume that a symbol is local cannot // produce a 0 if it turns out the symbol is undefined. While this // is ABI and relocation depended, it seems worth it to handle it // here. - if (isPositionIndependent() && GV->hasExternalWeakLinkage()) + if (GV && isPositionIndependent() && GV->hasExternalWeakLinkage()) return false; - if (!GV->hasDefaultVisibility()) + if (GV && !GV->hasDefaultVisibility()) return true; if (TT.isOSBinFormatMachO()) { if (RM == Reloc::Static) return true; - return GV->isStrongDefinitionForLinker(); + return GV && GV->isStrongDefinitionForLinker(); } assert(TT.isOSBinFormatELF()); @@ -167,19 +172,19 @@ RM == Reloc::Static || M.getPIELevel() != PIELevel::Default; if (IsExecutable) { // If the symbol is defined, it cannot be preempted. - if (!GV->isDeclarationForLinker()) + if (GV && !GV->isDeclarationForLinker()) return true; // A symbol marked nonlazybind should not be accessed with a plt. If the // symbol turns out to be external, the linker will convert a direct // access to an access via the plt, so don't assume it is local. - const Function *F = dyn_cast(GV); + const Function *F = dyn_cast_or_null(GV); if (F && F->hasFnAttribute(Attribute::NonLazyBind)) return false; - bool IsTLS = GV->isThreadLocal(); + bool IsTLS = GV && GV->isThreadLocal(); bool IsAccessViaCopyRelocs = - Options.MCOptions.MCPIECopyRelocations && isa(GV); + GV && Options.MCOptions.MCPIECopyRelocations && isa(GV); Triple::ArchType Arch = TT.getArch(); bool IsPPC = Arch == Triple::ppc || Arch == Triple::ppc64 || Arch == Triple::ppc64le; Index: test/CodeGen/X86/finite-libcalls.ll =================================================================== --- test/CodeGen/X86/finite-libcalls.ll +++ test/CodeGen/X86/finite-libcalls.ll @@ -9,7 +9,7 @@ define float @exp_f32(float %x) #0 { ; GNU-LABEL: exp_f32: ; GNU: # %bb.0: -; GNU-NEXT: jmp __expf_finite@PLT # TAILCALL +; GNU-NEXT: jmp __expf_finite # TAILCALL ; ; WIN-LABEL: exp_f32: ; WIN: # %bb.0: @@ -25,7 +25,7 @@ define double @exp_f64(double %x) #0 { ; GNU-LABEL: exp_f64: ; GNU: # %bb.0: -; GNU-NEXT: jmp __exp_finite@PLT # TAILCALL +; GNU-NEXT: jmp __exp_finite # TAILCALL ; ; WIN-LABEL: exp_f64: ; WIN: # %bb.0: @@ -72,7 +72,7 @@ define float @exp2_f32(float %x) #0 { ; GNU-LABEL: exp2_f32: ; GNU: # %bb.0: -; GNU-NEXT: jmp __exp2f_finite@PLT # TAILCALL +; GNU-NEXT: jmp __exp2f_finite # TAILCALL ; ; WIN-LABEL: exp2_f32: ; WIN: # %bb.0: @@ -88,7 +88,7 @@ define double @exp2_f64(double %x) #0 { ; GNU-LABEL: exp2_f64: ; GNU: # %bb.0: -; GNU-NEXT: jmp __exp2_finite@PLT # TAILCALL +; GNU-NEXT: jmp __exp2_finite # TAILCALL ; ; WIN-LABEL: exp2_f64: ; WIN: # %bb.0: @@ -135,7 +135,7 @@ define float @log_f32(float %x) #0 { ; GNU-LABEL: log_f32: ; GNU: # %bb.0: -; GNU-NEXT: jmp __logf_finite@PLT # TAILCALL +; GNU-NEXT: jmp __logf_finite # TAILCALL ; ; WIN-LABEL: log_f32: ; WIN: # %bb.0: @@ -151,7 +151,7 @@ define double @log_f64(double %x) #0 { ; GNU-LABEL: log_f64: ; GNU: # %bb.0: -; GNU-NEXT: jmp __log_finite@PLT # TAILCALL +; GNU-NEXT: jmp __log_finite # TAILCALL ; ; WIN-LABEL: log_f64: ; WIN: # %bb.0: @@ -198,7 +198,7 @@ define float @log2_f32(float %x) #0 { ; GNU-LABEL: log2_f32: ; GNU: # %bb.0: -; GNU-NEXT: jmp __log2f_finite@PLT # TAILCALL +; GNU-NEXT: jmp __log2f_finite # TAILCALL ; ; WIN-LABEL: log2_f32: ; WIN: # %bb.0: @@ -214,7 +214,7 @@ define double @log2_f64(double %x) #0 { ; GNU-LABEL: log2_f64: ; GNU: # %bb.0: -; GNU-NEXT: jmp __log2_finite@PLT # TAILCALL +; GNU-NEXT: jmp __log2_finite # TAILCALL ; ; WIN-LABEL: log2_f64: ; WIN: # %bb.0: @@ -261,7 +261,7 @@ define float @log10_f32(float %x) #0 { ; GNU-LABEL: log10_f32: ; GNU: # %bb.0: -; GNU-NEXT: jmp __log10f_finite@PLT # TAILCALL +; GNU-NEXT: jmp __log10f_finite # TAILCALL ; ; WIN-LABEL: log10_f32: ; WIN: # %bb.0: @@ -277,7 +277,7 @@ define double @log10_f64(double %x) #0 { ; GNU-LABEL: log10_f64: ; GNU: # %bb.0: -; GNU-NEXT: jmp __log10_finite@PLT # TAILCALL +; GNU-NEXT: jmp __log10_finite # TAILCALL ; ; WIN-LABEL: log10_f64: ; WIN: # %bb.0: @@ -325,7 +325,7 @@ ; GNU-LABEL: pow_f32: ; GNU: # %bb.0: ; GNU-NEXT: movaps %xmm0, %xmm1 -; GNU-NEXT: jmp __powf_finite@PLT # TAILCALL +; GNU-NEXT: jmp __powf_finite # TAILCALL ; ; WIN-LABEL: pow_f32: ; WIN: # %bb.0: @@ -344,7 +344,7 @@ ; GNU-LABEL: pow_f64: ; GNU: # %bb.0: ; GNU-NEXT: movaps %xmm0, %xmm1 -; GNU-NEXT: jmp __pow_finite@PLT # TAILCALL +; GNU-NEXT: jmp __pow_finite # TAILCALL ; ; WIN-LABEL: pow_f64: ; WIN: # %bb.0: Index: test/CodeGen/X86/fmaxnum.ll =================================================================== --- test/CodeGen/X86/fmaxnum.ll +++ test/CodeGen/X86/fmaxnum.ll @@ -43,7 +43,7 @@ define float @test_fmaxf_minsize(float %x, float %y) minsize { ; CHECK-LABEL: test_fmaxf_minsize: ; CHECK: # %bb.0: -; CHECK-NEXT: jmp fmaxf@PLT # TAILCALL +; CHECK-NEXT: jmp fmaxf # TAILCALL %z = call float @fmaxf(float %x, float %y) readnone ret float %z } @@ -82,7 +82,7 @@ ; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) ; CHECK-NEXT: fstpt {{[0-9]+}}(%rsp) ; CHECK-NEXT: fstpt (%rsp) -; CHECK-NEXT: callq fmaxl@PLT +; CHECK-NEXT: callq fmaxl ; CHECK-NEXT: addq $40, %rsp ; CHECK-NEXT: retq %z = call x86_fp80 @fmaxl(x86_fp80 %x, x86_fp80 %y) readnone @@ -144,7 +144,7 @@ ; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) ; CHECK-NEXT: fstpt {{[0-9]+}}(%rsp) ; CHECK-NEXT: fstpt (%rsp) -; CHECK-NEXT: callq fmaxl@PLT +; CHECK-NEXT: callq fmaxl ; CHECK-NEXT: addq $40, %rsp ; CHECK-NEXT: retq %z = call x86_fp80 @llvm.maxnum.f80(x86_fp80 %x, x86_fp80 %y) readnone Index: test/CodeGen/X86/fminnum.ll =================================================================== --- test/CodeGen/X86/fminnum.ll +++ test/CodeGen/X86/fminnum.ll @@ -74,7 +74,7 @@ ; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) ; CHECK-NEXT: fstpt {{[0-9]+}}(%rsp) ; CHECK-NEXT: fstpt (%rsp) -; CHECK-NEXT: callq fminl@PLT +; CHECK-NEXT: callq fminl ; CHECK-NEXT: addq $40, %rsp ; CHECK-NEXT: retq %z = call x86_fp80 @fminl(x86_fp80 %x, x86_fp80 %y) readnone @@ -136,7 +136,7 @@ ; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) ; CHECK-NEXT: fstpt {{[0-9]+}}(%rsp) ; CHECK-NEXT: fstpt (%rsp) -; CHECK-NEXT: callq fminl@PLT +; CHECK-NEXT: callq fminl ; CHECK-NEXT: addq $40, %rsp ; CHECK-NEXT: retq %z = call x86_fp80 @llvm.minnum.f80(x86_fp80 %x, x86_fp80 %y) readnone Index: test/CodeGen/X86/fp-cvt.ll =================================================================== --- test/CodeGen/X86/fp-cvt.ll +++ test/CodeGen/X86/fp-cvt.ll @@ -924,7 +924,7 @@ ; X86-NEXT: subl $12, %esp ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fstpt (%esp) -; X86-NEXT: calll floorl@PLT +; X86-NEXT: calll floorl ; X86-NEXT: addl $12, %esp ; X86-NEXT: retl ; @@ -933,7 +933,7 @@ ; X64-NEXT: subq $24, %rsp ; X64-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-NEXT: fstpt (%rsp) -; X64-NEXT: callq floorl@PLT +; X64-NEXT: callq floorl ; X64-NEXT: addq $24, %rsp ; X64-NEXT: retq %1 = call x86_fp80 @llvm.floor.f80(x86_fp80 %a0) @@ -947,7 +947,7 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: fldt (%eax) ; X86-NEXT: fstpt (%esp) -; X86-NEXT: calll floorl@PLT +; X86-NEXT: calll floorl ; X86-NEXT: addl $12, %esp ; X86-NEXT: retl ; @@ -956,7 +956,7 @@ ; X64-NEXT: subq $24, %rsp ; X64-NEXT: fldt (%rdi) ; X64-NEXT: fstpt (%rsp) -; X64-NEXT: callq floorl@PLT +; X64-NEXT: callq floorl ; X64-NEXT: addq $24, %rsp ; X64-NEXT: retq %1 = load x86_fp80, x86_fp80 *%a0 @@ -976,7 +976,7 @@ ; X86-NEXT: subl $12, %esp ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fstpt (%esp) -; X86-NEXT: calll ceill@PLT +; X86-NEXT: calll ceill ; X86-NEXT: addl $12, %esp ; X86-NEXT: retl ; @@ -985,7 +985,7 @@ ; X64-NEXT: subq $24, %rsp ; X64-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-NEXT: fstpt (%rsp) -; X64-NEXT: callq ceill@PLT +; X64-NEXT: callq ceill ; X64-NEXT: addq $24, %rsp ; X64-NEXT: retq %1 = call x86_fp80 @llvm.ceil.f80(x86_fp80 %a0) @@ -999,7 +999,7 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: fldt (%eax) ; X86-NEXT: fstpt (%esp) -; X86-NEXT: calll ceill@PLT +; X86-NEXT: calll ceill ; X86-NEXT: addl $12, %esp ; X86-NEXT: retl ; @@ -1008,7 +1008,7 @@ ; X64-NEXT: subq $24, %rsp ; X64-NEXT: fldt (%rdi) ; X64-NEXT: fstpt (%rsp) -; X64-NEXT: callq ceill@PLT +; X64-NEXT: callq ceill ; X64-NEXT: addq $24, %rsp ; X64-NEXT: retq %1 = load x86_fp80, x86_fp80 *%a0 @@ -1028,7 +1028,7 @@ ; X86-NEXT: subl $12, %esp ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fstpt (%esp) -; X86-NEXT: calll truncl@PLT +; X86-NEXT: calll truncl ; X86-NEXT: addl $12, %esp ; X86-NEXT: retl ; @@ -1037,7 +1037,7 @@ ; X64-NEXT: subq $24, %rsp ; X64-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-NEXT: fstpt (%rsp) -; X64-NEXT: callq truncl@PLT +; X64-NEXT: callq truncl ; X64-NEXT: addq $24, %rsp ; X64-NEXT: retq %1 = call x86_fp80 @llvm.trunc.f80(x86_fp80 %a0) @@ -1051,7 +1051,7 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: fldt (%eax) ; X86-NEXT: fstpt (%esp) -; X86-NEXT: calll truncl@PLT +; X86-NEXT: calll truncl ; X86-NEXT: addl $12, %esp ; X86-NEXT: retl ; @@ -1060,7 +1060,7 @@ ; X64-NEXT: subq $24, %rsp ; X64-NEXT: fldt (%rdi) ; X64-NEXT: fstpt (%rsp) -; X64-NEXT: callq truncl@PLT +; X64-NEXT: callq truncl ; X64-NEXT: addq $24, %rsp ; X64-NEXT: retq %1 = load x86_fp80, x86_fp80 *%a0 @@ -1080,7 +1080,7 @@ ; X86-NEXT: subl $12, %esp ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fstpt (%esp) -; X86-NEXT: calll rintl@PLT +; X86-NEXT: calll rintl ; X86-NEXT: addl $12, %esp ; X86-NEXT: retl ; @@ -1089,7 +1089,7 @@ ; X64-NEXT: subq $24, %rsp ; X64-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-NEXT: fstpt (%rsp) -; X64-NEXT: callq rintl@PLT +; X64-NEXT: callq rintl ; X64-NEXT: addq $24, %rsp ; X64-NEXT: retq %1 = call x86_fp80 @llvm.rint.f80(x86_fp80 %a0) @@ -1103,7 +1103,7 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: fldt (%eax) ; X86-NEXT: fstpt (%esp) -; X86-NEXT: calll rintl@PLT +; X86-NEXT: calll rintl ; X86-NEXT: addl $12, %esp ; X86-NEXT: retl ; @@ -1112,7 +1112,7 @@ ; X64-NEXT: subq $24, %rsp ; X64-NEXT: fldt (%rdi) ; X64-NEXT: fstpt (%rsp) -; X64-NEXT: callq rintl@PLT +; X64-NEXT: callq rintl ; X64-NEXT: addq $24, %rsp ; X64-NEXT: retq %1 = load x86_fp80, x86_fp80 *%a0 Index: test/CodeGen/X86/fp-intrinsics.ll =================================================================== --- test/CodeGen/X86/fp-intrinsics.ll +++ test/CodeGen/X86/fp-intrinsics.ll @@ -245,7 +245,7 @@ ; Verify that fma(3.5) isn't simplified when the rounding mode is ; unknown. ; CHECK-LABEL: f17 -; FMACALL32: jmp fmaf@PLT # TAILCALL +; FMACALL32: jmp fmaf # TAILCALL ; FMA32: vfmadd213ss define float @f17() { entry: @@ -261,7 +261,7 @@ ; Verify that fma(42.1) isn't simplified when the rounding mode is ; unknown. ; CHECK-LABEL: f18 -; FMACALL64: jmp fma@PLT # TAILCALL +; FMACALL64: jmp fma # TAILCALL ; FMA64: vfmadd213sd define double @f18() { entry: Index: test/CodeGen/X86/half.ll =================================================================== --- test/CodeGen/X86/half.ll +++ test/CodeGen/X86/half.ll @@ -75,7 +75,7 @@ ; CHECK-LIBCALL-LABEL: test_extend32: ; CHECK-LIBCALL: # %bb.0: ; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi -; CHECK-LIBCALL-NEXT: jmp __gnu_h2f_ieee@PLT # TAILCALL +; CHECK-LIBCALL-NEXT: jmp __gnu_h2f_ieee # TAILCALL ; ; BWON-F16C-LABEL: test_extend32: ; BWON-F16C: # %bb.0: Index: test/CodeGen/X86/memset-nonzero.ll =================================================================== --- test/CodeGen/X86/memset-nonzero.ll +++ test/CodeGen/X86/memset-nonzero.ll @@ -398,7 +398,7 @@ ; SSE-LABEL: memset_256_nonconst_bytes: ; SSE: # %bb.0: ; SSE-NEXT: movl $256, %edx # imm = 0x100 -; SSE-NEXT: jmp memset@PLT # TAILCALL +; SSE-NEXT: jmp memset # TAILCALL ; ; SSE2FAST-LABEL: memset_256_nonconst_bytes: ; SSE2FAST: # %bb.0: Index: test/CodeGen/X86/negative-sin.ll =================================================================== --- test/CodeGen/X86/negative-sin.ll +++ test/CodeGen/X86/negative-sin.ll @@ -28,7 +28,7 @@ define double @fast(double %e) nounwind { ; CHECK-LABEL: fast: ; CHECK: # %bb.0: -; CHECK-NEXT: jmp sin@PLT # TAILCALL +; CHECK-NEXT: jmp sin # TAILCALL %f = fsub fast double 0.0, %e %g = call double @sin(double %f) readonly %h = fsub fast double 0.0, %g @@ -40,7 +40,7 @@ define double @nsz(double %e) nounwind { ; CHECK-LABEL: nsz: ; CHECK: # %bb.0: -; CHECK-NEXT: jmp sin@PLT # TAILCALL +; CHECK-NEXT: jmp sin # TAILCALL %f = fsub nsz double 0.0, %e %g = call double @sin(double %f) readonly %h = fsub nsz double 0.0, %g @@ -88,7 +88,7 @@ define double @fn_attr(double %e) nounwind #0 { ; CHECK-LABEL: fn_attr: ; CHECK: # %bb.0: -; CHECK-NEXT: jmp sin@PLT # TAILCALL +; CHECK-NEXT: jmp sin # TAILCALL %f = fsub double 0.0, %e %g = call double @sin(double %f) readonly %h = fsub double 0.0, %g Index: test/CodeGen/X86/scalar-fp-to-i64.ll =================================================================== --- test/CodeGen/X86/scalar-fp-to-i64.ll +++ test/CodeGen/X86/scalar-fp-to-i64.ll @@ -1439,7 +1439,7 @@ ; AVX512_32_LIN-NEXT: subl $28, %esp ; AVX512_32_LIN-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0 ; AVX512_32_LIN-NEXT: vmovups %xmm0, (%esp) -; AVX512_32_LIN-NEXT: calll __fixunstfdi@PLT +; AVX512_32_LIN-NEXT: calll __fixunstfdi ; AVX512_32_LIN-NEXT: addl $28, %esp ; AVX512_32_LIN-NEXT: retl ; @@ -1453,7 +1453,7 @@ ; AVX512_64_LIN-LABEL: t_to_u64: ; AVX512_64_LIN: # %bb.0: ; AVX512_64_LIN-NEXT: pushq %rax -; AVX512_64_LIN-NEXT: callq __fixunstfdi@PLT +; AVX512_64_LIN-NEXT: callq __fixunstfdi ; AVX512_64_LIN-NEXT: popq %rcx ; AVX512_64_LIN-NEXT: retq ; @@ -1474,7 +1474,7 @@ ; SSE3_32_LIN-NEXT: pushl {{[0-9]+}}(%esp) ; SSE3_32_LIN-NEXT: pushl {{[0-9]+}}(%esp) ; SSE3_32_LIN-NEXT: pushl {{[0-9]+}}(%esp) -; SSE3_32_LIN-NEXT: calll __fixunstfdi@PLT +; SSE3_32_LIN-NEXT: calll __fixunstfdi ; SSE3_32_LIN-NEXT: addl $28, %esp ; SSE3_32_LIN-NEXT: retl ; @@ -1488,7 +1488,7 @@ ; SSE3_64_LIN-LABEL: t_to_u64: ; SSE3_64_LIN: # %bb.0: ; SSE3_64_LIN-NEXT: pushq %rax -; SSE3_64_LIN-NEXT: callq __fixunstfdi@PLT +; SSE3_64_LIN-NEXT: callq __fixunstfdi ; SSE3_64_LIN-NEXT: popq %rcx ; SSE3_64_LIN-NEXT: retq ; @@ -1509,7 +1509,7 @@ ; SSE2_32_LIN-NEXT: pushl {{[0-9]+}}(%esp) ; SSE2_32_LIN-NEXT: pushl {{[0-9]+}}(%esp) ; SSE2_32_LIN-NEXT: pushl {{[0-9]+}}(%esp) -; SSE2_32_LIN-NEXT: calll __fixunstfdi@PLT +; SSE2_32_LIN-NEXT: calll __fixunstfdi ; SSE2_32_LIN-NEXT: addl $28, %esp ; SSE2_32_LIN-NEXT: retl ; @@ -1523,7 +1523,7 @@ ; SSE2_64_LIN-LABEL: t_to_u64: ; SSE2_64_LIN: # %bb.0: ; SSE2_64_LIN-NEXT: pushq %rax -; SSE2_64_LIN-NEXT: callq __fixunstfdi@PLT +; SSE2_64_LIN-NEXT: callq __fixunstfdi ; SSE2_64_LIN-NEXT: popq %rcx ; SSE2_64_LIN-NEXT: retq ; @@ -1544,7 +1544,7 @@ ; X87_LIN-NEXT: pushl {{[0-9]+}}(%esp) ; X87_LIN-NEXT: pushl {{[0-9]+}}(%esp) ; X87_LIN-NEXT: pushl {{[0-9]+}}(%esp) -; X87_LIN-NEXT: calll __fixunstfdi@PLT +; X87_LIN-NEXT: calll __fixunstfdi ; X87_LIN-NEXT: addl $28, %esp ; X87_LIN-NEXT: retl %r = fptoui fp128 %a to i64 @@ -1566,7 +1566,7 @@ ; AVX512_32_LIN-NEXT: subl $28, %esp ; AVX512_32_LIN-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0 ; AVX512_32_LIN-NEXT: vmovups %xmm0, (%esp) -; AVX512_32_LIN-NEXT: calll __fixtfdi@PLT +; AVX512_32_LIN-NEXT: calll __fixtfdi ; AVX512_32_LIN-NEXT: addl $28, %esp ; AVX512_32_LIN-NEXT: retl ; @@ -1580,7 +1580,7 @@ ; AVX512_64_LIN-LABEL: t_to_s64: ; AVX512_64_LIN: # %bb.0: ; AVX512_64_LIN-NEXT: pushq %rax -; AVX512_64_LIN-NEXT: callq __fixtfdi@PLT +; AVX512_64_LIN-NEXT: callq __fixtfdi ; AVX512_64_LIN-NEXT: popq %rcx ; AVX512_64_LIN-NEXT: retq ; @@ -1601,7 +1601,7 @@ ; SSE3_32_LIN-NEXT: pushl {{[0-9]+}}(%esp) ; SSE3_32_LIN-NEXT: pushl {{[0-9]+}}(%esp) ; SSE3_32_LIN-NEXT: pushl {{[0-9]+}}(%esp) -; SSE3_32_LIN-NEXT: calll __fixtfdi@PLT +; SSE3_32_LIN-NEXT: calll __fixtfdi ; SSE3_32_LIN-NEXT: addl $28, %esp ; SSE3_32_LIN-NEXT: retl ; @@ -1615,7 +1615,7 @@ ; SSE3_64_LIN-LABEL: t_to_s64: ; SSE3_64_LIN: # %bb.0: ; SSE3_64_LIN-NEXT: pushq %rax -; SSE3_64_LIN-NEXT: callq __fixtfdi@PLT +; SSE3_64_LIN-NEXT: callq __fixtfdi ; SSE3_64_LIN-NEXT: popq %rcx ; SSE3_64_LIN-NEXT: retq ; @@ -1636,7 +1636,7 @@ ; SSE2_32_LIN-NEXT: pushl {{[0-9]+}}(%esp) ; SSE2_32_LIN-NEXT: pushl {{[0-9]+}}(%esp) ; SSE2_32_LIN-NEXT: pushl {{[0-9]+}}(%esp) -; SSE2_32_LIN-NEXT: calll __fixtfdi@PLT +; SSE2_32_LIN-NEXT: calll __fixtfdi ; SSE2_32_LIN-NEXT: addl $28, %esp ; SSE2_32_LIN-NEXT: retl ; @@ -1650,7 +1650,7 @@ ; SSE2_64_LIN-LABEL: t_to_s64: ; SSE2_64_LIN: # %bb.0: ; SSE2_64_LIN-NEXT: pushq %rax -; SSE2_64_LIN-NEXT: callq __fixtfdi@PLT +; SSE2_64_LIN-NEXT: callq __fixtfdi ; SSE2_64_LIN-NEXT: popq %rcx ; SSE2_64_LIN-NEXT: retq ; @@ -1671,7 +1671,7 @@ ; X87_LIN-NEXT: pushl {{[0-9]+}}(%esp) ; X87_LIN-NEXT: pushl {{[0-9]+}}(%esp) ; X87_LIN-NEXT: pushl {{[0-9]+}}(%esp) -; X87_LIN-NEXT: calll __fixtfdi@PLT +; X87_LIN-NEXT: calll __fixtfdi ; X87_LIN-NEXT: addl $28, %esp ; X87_LIN-NEXT: retl %r = fptosi fp128 %a to i64 Index: test/CodeGen/X86/vector-half-conversions.ll =================================================================== --- test/CodeGen/X86/vector-half-conversions.ll +++ test/CodeGen/X86/vector-half-conversions.ll @@ -3010,7 +3010,7 @@ define i16 @cvt_f64_to_i16(double %a0) nounwind { ; ALL-LABEL: cvt_f64_to_i16: ; ALL: # %bb.0: -; ALL-NEXT: jmp __truncdfhf2@PLT # TAILCALL +; ALL-NEXT: jmp __truncdfhf2 # TAILCALL %1 = fptrunc double %a0 to half %2 = bitcast half %1 to i16 ret i16 %2 @@ -3023,11 +3023,11 @@ ; ALL-NEXT: subq $16, %rsp ; ALL-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; ALL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; ALL-NEXT: callq __truncdfhf2@PLT +; ALL-NEXT: callq __truncdfhf2 ; ALL-NEXT: movl %eax, %ebx ; ALL-NEXT: shll $16, %ebx ; ALL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; ALL-NEXT: callq __truncdfhf2@PLT +; ALL-NEXT: callq __truncdfhf2 ; ALL-NEXT: movzwl %ax, %eax ; ALL-NEXT: orl %ebx, %eax ; ALL-NEXT: vmovd %eax, %xmm0 @@ -3048,13 +3048,13 @@ ; AVX1-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX1-NEXT: vzeroupper -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movl %eax, %ebx ; AVX1-NEXT: shll $16, %ebx ; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX1-NEXT: vzeroupper -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movzwl %ax, %r14d ; AVX1-NEXT: orl %ebx, %r14d ; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload @@ -3062,11 +3062,11 @@ ; AVX1-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX1-NEXT: vzeroupper -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movl %eax, %ebx ; AVX1-NEXT: shll $16, %ebx ; AVX1-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movzwl %ax, %eax ; AVX1-NEXT: orl %ebx, %eax ; AVX1-NEXT: shlq $32, %rax @@ -3085,13 +3085,13 @@ ; AVX2-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill ; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX2-NEXT: vzeroupper -; AVX2-NEXT: callq __truncdfhf2@PLT +; AVX2-NEXT: callq __truncdfhf2 ; AVX2-NEXT: movl %eax, %ebx ; AVX2-NEXT: shll $16, %ebx ; AVX2-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX2-NEXT: vzeroupper -; AVX2-NEXT: callq __truncdfhf2@PLT +; AVX2-NEXT: callq __truncdfhf2 ; AVX2-NEXT: movzwl %ax, %r14d ; AVX2-NEXT: orl %ebx, %r14d ; AVX2-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload @@ -3099,11 +3099,11 @@ ; AVX2-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX2-NEXT: vzeroupper -; AVX2-NEXT: callq __truncdfhf2@PLT +; AVX2-NEXT: callq __truncdfhf2 ; AVX2-NEXT: movl %eax, %ebx ; AVX2-NEXT: shll $16, %ebx ; AVX2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX2-NEXT: callq __truncdfhf2@PLT +; AVX2-NEXT: callq __truncdfhf2 ; AVX2-NEXT: movzwl %ax, %eax ; AVX2-NEXT: orl %ebx, %eax ; AVX2-NEXT: shlq $32, %rax @@ -3122,13 +3122,13 @@ ; AVX512-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vzeroupper -; AVX512-NEXT: callq __truncdfhf2@PLT +; AVX512-NEXT: callq __truncdfhf2 ; AVX512-NEXT: movl %eax, %ebx ; AVX512-NEXT: shll $16, %ebx ; AVX512-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512-NEXT: vzeroupper -; AVX512-NEXT: callq __truncdfhf2@PLT +; AVX512-NEXT: callq __truncdfhf2 ; AVX512-NEXT: movzwl %ax, %r14d ; AVX512-NEXT: orl %ebx, %r14d ; AVX512-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload @@ -3136,11 +3136,11 @@ ; AVX512-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vzeroupper -; AVX512-NEXT: callq __truncdfhf2@PLT +; AVX512-NEXT: callq __truncdfhf2 ; AVX512-NEXT: movl %eax, %ebx ; AVX512-NEXT: shll $16, %ebx ; AVX512-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX512-NEXT: callq __truncdfhf2@PLT +; AVX512-NEXT: callq __truncdfhf2 ; AVX512-NEXT: movzwl %ax, %eax ; AVX512-NEXT: orl %ebx, %eax ; AVX512-NEXT: shlq $32, %rax @@ -3164,13 +3164,13 @@ ; AVX1-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX1-NEXT: vzeroupper -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movl %eax, %ebx ; AVX1-NEXT: shll $16, %ebx ; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX1-NEXT: vzeroupper -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movzwl %ax, %r14d ; AVX1-NEXT: orl %ebx, %r14d ; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload @@ -3178,11 +3178,11 @@ ; AVX1-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX1-NEXT: vzeroupper -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movl %eax, %ebx ; AVX1-NEXT: shll $16, %ebx ; AVX1-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movzwl %ax, %eax ; AVX1-NEXT: orl %ebx, %eax ; AVX1-NEXT: shlq $32, %rax @@ -3202,13 +3202,13 @@ ; AVX2-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill ; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX2-NEXT: vzeroupper -; AVX2-NEXT: callq __truncdfhf2@PLT +; AVX2-NEXT: callq __truncdfhf2 ; AVX2-NEXT: movl %eax, %ebx ; AVX2-NEXT: shll $16, %ebx ; AVX2-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX2-NEXT: vzeroupper -; AVX2-NEXT: callq __truncdfhf2@PLT +; AVX2-NEXT: callq __truncdfhf2 ; AVX2-NEXT: movzwl %ax, %r14d ; AVX2-NEXT: orl %ebx, %r14d ; AVX2-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload @@ -3216,11 +3216,11 @@ ; AVX2-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX2-NEXT: vzeroupper -; AVX2-NEXT: callq __truncdfhf2@PLT +; AVX2-NEXT: callq __truncdfhf2 ; AVX2-NEXT: movl %eax, %ebx ; AVX2-NEXT: shll $16, %ebx ; AVX2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX2-NEXT: callq __truncdfhf2@PLT +; AVX2-NEXT: callq __truncdfhf2 ; AVX2-NEXT: movzwl %ax, %eax ; AVX2-NEXT: orl %ebx, %eax ; AVX2-NEXT: shlq $32, %rax @@ -3240,13 +3240,13 @@ ; AVX512-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vzeroupper -; AVX512-NEXT: callq __truncdfhf2@PLT +; AVX512-NEXT: callq __truncdfhf2 ; AVX512-NEXT: movl %eax, %ebx ; AVX512-NEXT: shll $16, %ebx ; AVX512-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512-NEXT: vzeroupper -; AVX512-NEXT: callq __truncdfhf2@PLT +; AVX512-NEXT: callq __truncdfhf2 ; AVX512-NEXT: movzwl %ax, %r14d ; AVX512-NEXT: orl %ebx, %r14d ; AVX512-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload @@ -3254,11 +3254,11 @@ ; AVX512-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vzeroupper -; AVX512-NEXT: callq __truncdfhf2@PLT +; AVX512-NEXT: callq __truncdfhf2 ; AVX512-NEXT: movl %eax, %ebx ; AVX512-NEXT: shll $16, %ebx ; AVX512-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX512-NEXT: callq __truncdfhf2@PLT +; AVX512-NEXT: callq __truncdfhf2 ; AVX512-NEXT: movzwl %ax, %eax ; AVX512-NEXT: orl %ebx, %eax ; AVX512-NEXT: shlq $32, %rax @@ -3284,13 +3284,13 @@ ; AVX1-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX1-NEXT: vzeroupper -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movl %eax, %ebx ; AVX1-NEXT: shll $16, %ebx ; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX1-NEXT: vzeroupper -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movzwl %ax, %r14d ; AVX1-NEXT: orl %ebx, %r14d ; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload @@ -3298,11 +3298,11 @@ ; AVX1-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX1-NEXT: vzeroupper -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movl %eax, %ebx ; AVX1-NEXT: shll $16, %ebx ; AVX1-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movzwl %ax, %eax ; AVX1-NEXT: orl %ebx, %eax ; AVX1-NEXT: shlq $32, %rax @@ -3323,13 +3323,13 @@ ; AVX2-SLOW-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill ; AVX2-SLOW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX2-SLOW-NEXT: vzeroupper -; AVX2-SLOW-NEXT: callq __truncdfhf2@PLT +; AVX2-SLOW-NEXT: callq __truncdfhf2 ; AVX2-SLOW-NEXT: movl %eax, %ebx ; AVX2-SLOW-NEXT: shll $16, %ebx ; AVX2-SLOW-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX2-SLOW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX2-SLOW-NEXT: vzeroupper -; AVX2-SLOW-NEXT: callq __truncdfhf2@PLT +; AVX2-SLOW-NEXT: callq __truncdfhf2 ; AVX2-SLOW-NEXT: movzwl %ax, %r14d ; AVX2-SLOW-NEXT: orl %ebx, %r14d ; AVX2-SLOW-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload @@ -3337,11 +3337,11 @@ ; AVX2-SLOW-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX2-SLOW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX2-SLOW-NEXT: vzeroupper -; AVX2-SLOW-NEXT: callq __truncdfhf2@PLT +; AVX2-SLOW-NEXT: callq __truncdfhf2 ; AVX2-SLOW-NEXT: movl %eax, %ebx ; AVX2-SLOW-NEXT: shll $16, %ebx ; AVX2-SLOW-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX2-SLOW-NEXT: callq __truncdfhf2@PLT +; AVX2-SLOW-NEXT: callq __truncdfhf2 ; AVX2-SLOW-NEXT: movzwl %ax, %eax ; AVX2-SLOW-NEXT: orl %ebx, %eax ; AVX2-SLOW-NEXT: shlq $32, %rax @@ -3362,13 +3362,13 @@ ; AVX2-FAST-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill ; AVX2-FAST-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX2-FAST-NEXT: vzeroupper -; AVX2-FAST-NEXT: callq __truncdfhf2@PLT +; AVX2-FAST-NEXT: callq __truncdfhf2 ; AVX2-FAST-NEXT: movl %eax, %ebx ; AVX2-FAST-NEXT: shll $16, %ebx ; AVX2-FAST-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX2-FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX2-FAST-NEXT: vzeroupper -; AVX2-FAST-NEXT: callq __truncdfhf2@PLT +; AVX2-FAST-NEXT: callq __truncdfhf2 ; AVX2-FAST-NEXT: movzwl %ax, %r14d ; AVX2-FAST-NEXT: orl %ebx, %r14d ; AVX2-FAST-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload @@ -3376,11 +3376,11 @@ ; AVX2-FAST-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX2-FAST-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX2-FAST-NEXT: vzeroupper -; AVX2-FAST-NEXT: callq __truncdfhf2@PLT +; AVX2-FAST-NEXT: callq __truncdfhf2 ; AVX2-FAST-NEXT: movl %eax, %ebx ; AVX2-FAST-NEXT: shll $16, %ebx ; AVX2-FAST-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX2-FAST-NEXT: callq __truncdfhf2@PLT +; AVX2-FAST-NEXT: callq __truncdfhf2 ; AVX2-FAST-NEXT: movzwl %ax, %eax ; AVX2-FAST-NEXT: orl %ebx, %eax ; AVX2-FAST-NEXT: shlq $32, %rax @@ -3400,13 +3400,13 @@ ; AVX512F-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: callq __truncdfhf2@PLT +; AVX512F-NEXT: callq __truncdfhf2 ; AVX512F-NEXT: movl %eax, %ebx ; AVX512F-NEXT: shll $16, %ebx ; AVX512F-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: callq __truncdfhf2@PLT +; AVX512F-NEXT: callq __truncdfhf2 ; AVX512F-NEXT: movzwl %ax, %r14d ; AVX512F-NEXT: orl %ebx, %r14d ; AVX512F-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload @@ -3414,11 +3414,11 @@ ; AVX512F-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: callq __truncdfhf2@PLT +; AVX512F-NEXT: callq __truncdfhf2 ; AVX512F-NEXT: movl %eax, %ebx ; AVX512F-NEXT: shll $16, %ebx ; AVX512F-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX512F-NEXT: callq __truncdfhf2@PLT +; AVX512F-NEXT: callq __truncdfhf2 ; AVX512F-NEXT: movzwl %ax, %eax ; AVX512F-NEXT: orl %ebx, %eax ; AVX512F-NEXT: shlq $32, %rax @@ -3439,13 +3439,13 @@ ; AVX512VL-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: callq __truncdfhf2@PLT +; AVX512VL-NEXT: callq __truncdfhf2 ; AVX512VL-NEXT: movl %eax, %ebx ; AVX512VL-NEXT: shll $16, %ebx ; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: callq __truncdfhf2@PLT +; AVX512VL-NEXT: callq __truncdfhf2 ; AVX512VL-NEXT: movzwl %ax, %r14d ; AVX512VL-NEXT: orl %ebx, %r14d ; AVX512VL-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload @@ -3453,11 +3453,11 @@ ; AVX512VL-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: callq __truncdfhf2@PLT +; AVX512VL-NEXT: callq __truncdfhf2 ; AVX512VL-NEXT: movl %eax, %ebx ; AVX512VL-NEXT: shll $16, %ebx ; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX512VL-NEXT: callq __truncdfhf2@PLT +; AVX512VL-NEXT: callq __truncdfhf2 ; AVX512VL-NEXT: movzwl %ax, %eax ; AVX512VL-NEXT: orl %ebx, %eax ; AVX512VL-NEXT: shlq $32, %rax @@ -3485,13 +3485,13 @@ ; AVX1-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX1-NEXT: vzeroupper -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movl %eax, %ebx ; AVX1-NEXT: shll $16, %ebx ; AVX1-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload ; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX1-NEXT: vzeroupper -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movzwl %ax, %r15d ; AVX1-NEXT: orl %ebx, %r15d ; AVX1-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload @@ -3499,24 +3499,24 @@ ; AVX1-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX1-NEXT: vzeroupper -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movl %eax, %ebx ; AVX1-NEXT: shll $16, %ebx ; AVX1-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movzwl %ax, %r14d ; AVX1-NEXT: orl %ebx, %r14d ; AVX1-NEXT: shlq $32, %r14 ; AVX1-NEXT: orq %r15, %r14 ; AVX1-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload ; AVX1-NEXT: # xmm0 = mem[1,0] -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movl %eax, %ebx ; AVX1-NEXT: shll $16, %ebx ; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX1-NEXT: vzeroupper -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movzwl %ax, %r15d ; AVX1-NEXT: orl %ebx, %r15d ; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload @@ -3524,11 +3524,11 @@ ; AVX1-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX1-NEXT: vzeroupper -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movl %eax, %ebx ; AVX1-NEXT: shll $16, %ebx ; AVX1-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movzwl %ax, %eax ; AVX1-NEXT: orl %ebx, %eax ; AVX1-NEXT: shlq $32, %rax @@ -3552,13 +3552,13 @@ ; AVX2-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill ; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX2-NEXT: vzeroupper -; AVX2-NEXT: callq __truncdfhf2@PLT +; AVX2-NEXT: callq __truncdfhf2 ; AVX2-NEXT: movl %eax, %ebx ; AVX2-NEXT: shll $16, %ebx ; AVX2-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX2-NEXT: vzeroupper -; AVX2-NEXT: callq __truncdfhf2@PLT +; AVX2-NEXT: callq __truncdfhf2 ; AVX2-NEXT: movzwl %ax, %r15d ; AVX2-NEXT: orl %ebx, %r15d ; AVX2-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload @@ -3566,24 +3566,24 @@ ; AVX2-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill ; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX2-NEXT: vzeroupper -; AVX2-NEXT: callq __truncdfhf2@PLT +; AVX2-NEXT: callq __truncdfhf2 ; AVX2-NEXT: movl %eax, %ebx ; AVX2-NEXT: shll $16, %ebx ; AVX2-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload -; AVX2-NEXT: callq __truncdfhf2@PLT +; AVX2-NEXT: callq __truncdfhf2 ; AVX2-NEXT: movzwl %ax, %r14d ; AVX2-NEXT: orl %ebx, %r14d ; AVX2-NEXT: shlq $32, %r14 ; AVX2-NEXT: orq %r15, %r14 ; AVX2-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload ; AVX2-NEXT: # xmm0 = mem[1,0] -; AVX2-NEXT: callq __truncdfhf2@PLT +; AVX2-NEXT: callq __truncdfhf2 ; AVX2-NEXT: movl %eax, %ebx ; AVX2-NEXT: shll $16, %ebx ; AVX2-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX2-NEXT: vzeroupper -; AVX2-NEXT: callq __truncdfhf2@PLT +; AVX2-NEXT: callq __truncdfhf2 ; AVX2-NEXT: movzwl %ax, %r15d ; AVX2-NEXT: orl %ebx, %r15d ; AVX2-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload @@ -3591,11 +3591,11 @@ ; AVX2-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX2-NEXT: vzeroupper -; AVX2-NEXT: callq __truncdfhf2@PLT +; AVX2-NEXT: callq __truncdfhf2 ; AVX2-NEXT: movl %eax, %ebx ; AVX2-NEXT: shll $16, %ebx ; AVX2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX2-NEXT: callq __truncdfhf2@PLT +; AVX2-NEXT: callq __truncdfhf2 ; AVX2-NEXT: movzwl %ax, %eax ; AVX2-NEXT: orl %ebx, %eax ; AVX2-NEXT: shlq $32, %rax @@ -3618,13 +3618,13 @@ ; AVX512-NEXT: vmovupd %zmm0, (%rsp) # 64-byte Spill ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vzeroupper -; AVX512-NEXT: callq __truncdfhf2@PLT +; AVX512-NEXT: callq __truncdfhf2 ; AVX512-NEXT: movl %eax, %ebx ; AVX512-NEXT: shll $16, %ebx ; AVX512-NEXT: vmovups (%rsp), %zmm0 # 64-byte Reload ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper -; AVX512-NEXT: callq __truncdfhf2@PLT +; AVX512-NEXT: callq __truncdfhf2 ; AVX512-NEXT: movzwl %ax, %r15d ; AVX512-NEXT: orl %ebx, %r15d ; AVX512-NEXT: vmovupd (%rsp), %zmm0 # 64-byte Reload @@ -3632,11 +3632,11 @@ ; AVX512-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vzeroupper -; AVX512-NEXT: callq __truncdfhf2@PLT +; AVX512-NEXT: callq __truncdfhf2 ; AVX512-NEXT: movl %eax, %ebx ; AVX512-NEXT: shll $16, %ebx ; AVX512-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload -; AVX512-NEXT: callq __truncdfhf2@PLT +; AVX512-NEXT: callq __truncdfhf2 ; AVX512-NEXT: movzwl %ax, %r14d ; AVX512-NEXT: orl %ebx, %r14d ; AVX512-NEXT: shlq $32, %r14 @@ -3646,13 +3646,13 @@ ; AVX512-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vzeroupper -; AVX512-NEXT: callq __truncdfhf2@PLT +; AVX512-NEXT: callq __truncdfhf2 ; AVX512-NEXT: movl %eax, %ebx ; AVX512-NEXT: shll $16, %ebx ; AVX512-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512-NEXT: vzeroupper -; AVX512-NEXT: callq __truncdfhf2@PLT +; AVX512-NEXT: callq __truncdfhf2 ; AVX512-NEXT: movzwl %ax, %r15d ; AVX512-NEXT: orl %ebx, %r15d ; AVX512-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload @@ -3660,11 +3660,11 @@ ; AVX512-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vzeroupper -; AVX512-NEXT: callq __truncdfhf2@PLT +; AVX512-NEXT: callq __truncdfhf2 ; AVX512-NEXT: movl %eax, %ebx ; AVX512-NEXT: shll $16, %ebx ; AVX512-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX512-NEXT: callq __truncdfhf2@PLT +; AVX512-NEXT: callq __truncdfhf2 ; AVX512-NEXT: movzwl %ax, %eax ; AVX512-NEXT: orl %ebx, %eax ; AVX512-NEXT: shlq $32, %rax @@ -3691,7 +3691,7 @@ ; ALL: # %bb.0: ; ALL-NEXT: pushq %rbx ; ALL-NEXT: movq %rdi, %rbx -; ALL-NEXT: callq __truncdfhf2@PLT +; ALL-NEXT: callq __truncdfhf2 ; ALL-NEXT: movw %ax, (%rbx) ; ALL-NEXT: popq %rbx ; ALL-NEXT: retq @@ -3710,10 +3710,10 @@ ; ALL-NEXT: movq %rdi, %rbx ; ALL-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; ALL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; ALL-NEXT: callq __truncdfhf2@PLT +; ALL-NEXT: callq __truncdfhf2 ; ALL-NEXT: movl %eax, %ebp ; ALL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; ALL-NEXT: callq __truncdfhf2@PLT +; ALL-NEXT: callq __truncdfhf2 ; ALL-NEXT: movw %ax, (%rbx) ; ALL-NEXT: movw %bp, 2(%rbx) ; ALL-NEXT: addq $24, %rsp @@ -3738,22 +3738,22 @@ ; AVX1-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX1-NEXT: vzeroupper -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movl %eax, %r14d ; AVX1-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX1-NEXT: vzeroupper -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movl %eax, %r15d ; AVX1-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload ; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX1-NEXT: vzeroupper -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movl %eax, %ebp ; AVX1-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movw %ax, 4(%rbx) ; AVX1-NEXT: movw %bp, (%rbx) ; AVX1-NEXT: movw %r15w, 6(%rbx) @@ -3776,22 +3776,22 @@ ; AVX2-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill ; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX2-NEXT: vzeroupper -; AVX2-NEXT: callq __truncdfhf2@PLT +; AVX2-NEXT: callq __truncdfhf2 ; AVX2-NEXT: movl %eax, %r14d ; AVX2-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload ; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX2-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill ; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX2-NEXT: vzeroupper -; AVX2-NEXT: callq __truncdfhf2@PLT +; AVX2-NEXT: callq __truncdfhf2 ; AVX2-NEXT: movl %eax, %r15d ; AVX2-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX2-NEXT: vzeroupper -; AVX2-NEXT: callq __truncdfhf2@PLT +; AVX2-NEXT: callq __truncdfhf2 ; AVX2-NEXT: movl %eax, %ebp ; AVX2-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload -; AVX2-NEXT: callq __truncdfhf2@PLT +; AVX2-NEXT: callq __truncdfhf2 ; AVX2-NEXT: movw %ax, 4(%rbx) ; AVX2-NEXT: movw %bp, (%rbx) ; AVX2-NEXT: movw %r15w, 6(%rbx) @@ -3814,22 +3814,22 @@ ; AVX512-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vzeroupper -; AVX512-NEXT: callq __truncdfhf2@PLT +; AVX512-NEXT: callq __truncdfhf2 ; AVX512-NEXT: movl %eax, %r14d ; AVX512-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX512-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vzeroupper -; AVX512-NEXT: callq __truncdfhf2@PLT +; AVX512-NEXT: callq __truncdfhf2 ; AVX512-NEXT: movl %eax, %r15d ; AVX512-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512-NEXT: vzeroupper -; AVX512-NEXT: callq __truncdfhf2@PLT +; AVX512-NEXT: callq __truncdfhf2 ; AVX512-NEXT: movl %eax, %ebp ; AVX512-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload -; AVX512-NEXT: callq __truncdfhf2@PLT +; AVX512-NEXT: callq __truncdfhf2 ; AVX512-NEXT: movw %ax, 4(%rbx) ; AVX512-NEXT: movw %bp, (%rbx) ; AVX512-NEXT: movw %r15w, 6(%rbx) @@ -3857,13 +3857,13 @@ ; AVX1-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX1-NEXT: vzeroupper -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movl %eax, %ebp ; AVX1-NEXT: shll $16, %ebp ; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX1-NEXT: vzeroupper -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movzwl %ax, %ebx ; AVX1-NEXT: orl %ebp, %ebx ; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload @@ -3871,11 +3871,11 @@ ; AVX1-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX1-NEXT: vzeroupper -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movl %eax, %ebp ; AVX1-NEXT: shll $16, %ebp ; AVX1-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movzwl %ax, %eax ; AVX1-NEXT: orl %ebp, %eax ; AVX1-NEXT: shlq $32, %rax @@ -3899,13 +3899,13 @@ ; AVX2-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill ; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX2-NEXT: vzeroupper -; AVX2-NEXT: callq __truncdfhf2@PLT +; AVX2-NEXT: callq __truncdfhf2 ; AVX2-NEXT: movl %eax, %ebp ; AVX2-NEXT: shll $16, %ebp ; AVX2-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX2-NEXT: vzeroupper -; AVX2-NEXT: callq __truncdfhf2@PLT +; AVX2-NEXT: callq __truncdfhf2 ; AVX2-NEXT: movzwl %ax, %ebx ; AVX2-NEXT: orl %ebp, %ebx ; AVX2-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload @@ -3913,11 +3913,11 @@ ; AVX2-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX2-NEXT: vzeroupper -; AVX2-NEXT: callq __truncdfhf2@PLT +; AVX2-NEXT: callq __truncdfhf2 ; AVX2-NEXT: movl %eax, %ebp ; AVX2-NEXT: shll $16, %ebp ; AVX2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX2-NEXT: callq __truncdfhf2@PLT +; AVX2-NEXT: callq __truncdfhf2 ; AVX2-NEXT: movzwl %ax, %eax ; AVX2-NEXT: orl %ebp, %eax ; AVX2-NEXT: shlq $32, %rax @@ -3941,13 +3941,13 @@ ; AVX512-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vzeroupper -; AVX512-NEXT: callq __truncdfhf2@PLT +; AVX512-NEXT: callq __truncdfhf2 ; AVX512-NEXT: movl %eax, %ebp ; AVX512-NEXT: shll $16, %ebp ; AVX512-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512-NEXT: vzeroupper -; AVX512-NEXT: callq __truncdfhf2@PLT +; AVX512-NEXT: callq __truncdfhf2 ; AVX512-NEXT: movzwl %ax, %ebx ; AVX512-NEXT: orl %ebp, %ebx ; AVX512-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload @@ -3955,11 +3955,11 @@ ; AVX512-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vzeroupper -; AVX512-NEXT: callq __truncdfhf2@PLT +; AVX512-NEXT: callq __truncdfhf2 ; AVX512-NEXT: movl %eax, %ebp ; AVX512-NEXT: shll $16, %ebp ; AVX512-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX512-NEXT: callq __truncdfhf2@PLT +; AVX512-NEXT: callq __truncdfhf2 ; AVX512-NEXT: movzwl %ax, %eax ; AVX512-NEXT: orl %ebp, %eax ; AVX512-NEXT: shlq $32, %rax @@ -3990,13 +3990,13 @@ ; AVX1-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX1-NEXT: vzeroupper -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movl %eax, %ebp ; AVX1-NEXT: shll $16, %ebp ; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX1-NEXT: vzeroupper -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movzwl %ax, %ebx ; AVX1-NEXT: orl %ebp, %ebx ; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload @@ -4004,11 +4004,11 @@ ; AVX1-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX1-NEXT: vzeroupper -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movl %eax, %ebp ; AVX1-NEXT: shll $16, %ebp ; AVX1-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movzwl %ax, %eax ; AVX1-NEXT: orl %ebp, %eax ; AVX1-NEXT: shlq $32, %rax @@ -4033,13 +4033,13 @@ ; AVX2-SLOW-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill ; AVX2-SLOW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX2-SLOW-NEXT: vzeroupper -; AVX2-SLOW-NEXT: callq __truncdfhf2@PLT +; AVX2-SLOW-NEXT: callq __truncdfhf2 ; AVX2-SLOW-NEXT: movl %eax, %ebp ; AVX2-SLOW-NEXT: shll $16, %ebp ; AVX2-SLOW-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX2-SLOW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX2-SLOW-NEXT: vzeroupper -; AVX2-SLOW-NEXT: callq __truncdfhf2@PLT +; AVX2-SLOW-NEXT: callq __truncdfhf2 ; AVX2-SLOW-NEXT: movzwl %ax, %ebx ; AVX2-SLOW-NEXT: orl %ebp, %ebx ; AVX2-SLOW-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload @@ -4047,11 +4047,11 @@ ; AVX2-SLOW-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX2-SLOW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX2-SLOW-NEXT: vzeroupper -; AVX2-SLOW-NEXT: callq __truncdfhf2@PLT +; AVX2-SLOW-NEXT: callq __truncdfhf2 ; AVX2-SLOW-NEXT: movl %eax, %ebp ; AVX2-SLOW-NEXT: shll $16, %ebp ; AVX2-SLOW-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX2-SLOW-NEXT: callq __truncdfhf2@PLT +; AVX2-SLOW-NEXT: callq __truncdfhf2 ; AVX2-SLOW-NEXT: movzwl %ax, %eax ; AVX2-SLOW-NEXT: orl %ebp, %eax ; AVX2-SLOW-NEXT: shlq $32, %rax @@ -4076,13 +4076,13 @@ ; AVX2-FAST-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill ; AVX2-FAST-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX2-FAST-NEXT: vzeroupper -; AVX2-FAST-NEXT: callq __truncdfhf2@PLT +; AVX2-FAST-NEXT: callq __truncdfhf2 ; AVX2-FAST-NEXT: movl %eax, %ebp ; AVX2-FAST-NEXT: shll $16, %ebp ; AVX2-FAST-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX2-FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX2-FAST-NEXT: vzeroupper -; AVX2-FAST-NEXT: callq __truncdfhf2@PLT +; AVX2-FAST-NEXT: callq __truncdfhf2 ; AVX2-FAST-NEXT: movzwl %ax, %ebx ; AVX2-FAST-NEXT: orl %ebp, %ebx ; AVX2-FAST-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload @@ -4090,11 +4090,11 @@ ; AVX2-FAST-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX2-FAST-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX2-FAST-NEXT: vzeroupper -; AVX2-FAST-NEXT: callq __truncdfhf2@PLT +; AVX2-FAST-NEXT: callq __truncdfhf2 ; AVX2-FAST-NEXT: movl %eax, %ebp ; AVX2-FAST-NEXT: shll $16, %ebp ; AVX2-FAST-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX2-FAST-NEXT: callq __truncdfhf2@PLT +; AVX2-FAST-NEXT: callq __truncdfhf2 ; AVX2-FAST-NEXT: movzwl %ax, %eax ; AVX2-FAST-NEXT: orl %ebp, %eax ; AVX2-FAST-NEXT: shlq $32, %rax @@ -4118,13 +4118,13 @@ ; AVX512F-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: callq __truncdfhf2@PLT +; AVX512F-NEXT: callq __truncdfhf2 ; AVX512F-NEXT: movl %eax, %ebp ; AVX512F-NEXT: shll $16, %ebp ; AVX512F-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: callq __truncdfhf2@PLT +; AVX512F-NEXT: callq __truncdfhf2 ; AVX512F-NEXT: movzwl %ax, %ebx ; AVX512F-NEXT: orl %ebp, %ebx ; AVX512F-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload @@ -4132,11 +4132,11 @@ ; AVX512F-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: callq __truncdfhf2@PLT +; AVX512F-NEXT: callq __truncdfhf2 ; AVX512F-NEXT: movl %eax, %ebp ; AVX512F-NEXT: shll $16, %ebp ; AVX512F-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX512F-NEXT: callq __truncdfhf2@PLT +; AVX512F-NEXT: callq __truncdfhf2 ; AVX512F-NEXT: movzwl %ax, %eax ; AVX512F-NEXT: orl %ebp, %eax ; AVX512F-NEXT: shlq $32, %rax @@ -4161,13 +4161,13 @@ ; AVX512VL-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: callq __truncdfhf2@PLT +; AVX512VL-NEXT: callq __truncdfhf2 ; AVX512VL-NEXT: movl %eax, %ebp ; AVX512VL-NEXT: shll $16, %ebp ; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: callq __truncdfhf2@PLT +; AVX512VL-NEXT: callq __truncdfhf2 ; AVX512VL-NEXT: movzwl %ax, %ebx ; AVX512VL-NEXT: orl %ebp, %ebx ; AVX512VL-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload @@ -4175,11 +4175,11 @@ ; AVX512VL-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: callq __truncdfhf2@PLT +; AVX512VL-NEXT: callq __truncdfhf2 ; AVX512VL-NEXT: movl %eax, %ebp ; AVX512VL-NEXT: shll $16, %ebp ; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX512VL-NEXT: callq __truncdfhf2@PLT +; AVX512VL-NEXT: callq __truncdfhf2 ; AVX512VL-NEXT: movzwl %ax, %eax ; AVX512VL-NEXT: orl %ebp, %eax ; AVX512VL-NEXT: shlq $32, %rax @@ -4214,41 +4214,41 @@ ; AVX1-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX1-NEXT: vzeroupper -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill ; AVX1-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX1-NEXT: vzeroupper -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill ; AVX1-NEXT: vpermilpd $1, {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload ; AVX1-NEXT: # xmm0 = mem[1,0] -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movl %eax, %r12d ; AVX1-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill ; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX1-NEXT: vzeroupper -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movl %eax, %r13d ; AVX1-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload ; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX1-NEXT: vzeroupper -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movl %eax, %ebp ; AVX1-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movl %eax, %r14d ; AVX1-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload ; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX1-NEXT: vzeroupper -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movl %eax, %r15d ; AVX1-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload -; AVX1-NEXT: callq __truncdfhf2@PLT +; AVX1-NEXT: callq __truncdfhf2 ; AVX1-NEXT: movw %ax, 12(%rbx) ; AVX1-NEXT: movw %r15w, 8(%rbx) ; AVX1-NEXT: movw %r14w, 4(%rbx) @@ -4282,41 +4282,41 @@ ; AVX2-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill ; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX2-NEXT: vzeroupper -; AVX2-NEXT: callq __truncdfhf2@PLT +; AVX2-NEXT: callq __truncdfhf2 ; AVX2-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill ; AVX2-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload ; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX2-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill ; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX2-NEXT: vzeroupper -; AVX2-NEXT: callq __truncdfhf2@PLT +; AVX2-NEXT: callq __truncdfhf2 ; AVX2-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill ; AVX2-NEXT: vpermilpd $1, {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload ; AVX2-NEXT: # xmm0 = mem[1,0] -; AVX2-NEXT: callq __truncdfhf2@PLT +; AVX2-NEXT: callq __truncdfhf2 ; AVX2-NEXT: movl %eax, %r12d ; AVX2-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload ; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX2-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill ; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX2-NEXT: vzeroupper -; AVX2-NEXT: callq __truncdfhf2@PLT +; AVX2-NEXT: callq __truncdfhf2 ; AVX2-NEXT: movl %eax, %r13d ; AVX2-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX2-NEXT: vzeroupper -; AVX2-NEXT: callq __truncdfhf2@PLT +; AVX2-NEXT: callq __truncdfhf2 ; AVX2-NEXT: movl %eax, %ebp ; AVX2-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload -; AVX2-NEXT: callq __truncdfhf2@PLT +; AVX2-NEXT: callq __truncdfhf2 ; AVX2-NEXT: movl %eax, %r14d ; AVX2-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX2-NEXT: vzeroupper -; AVX2-NEXT: callq __truncdfhf2@PLT +; AVX2-NEXT: callq __truncdfhf2 ; AVX2-NEXT: movl %eax, %r15d ; AVX2-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload -; AVX2-NEXT: callq __truncdfhf2@PLT +; AVX2-NEXT: callq __truncdfhf2 ; AVX2-NEXT: movw %ax, 12(%rbx) ; AVX2-NEXT: movw %r15w, 8(%rbx) ; AVX2-NEXT: movw %r14w, 4(%rbx) @@ -4349,44 +4349,44 @@ ; AVX512-NEXT: vmovupd %zmm0, {{[0-9]+}}(%rsp) # 64-byte Spill ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vzeroupper -; AVX512-NEXT: callq __truncdfhf2@PLT +; AVX512-NEXT: callq __truncdfhf2 ; AVX512-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill ; AVX512-NEXT: vmovupd {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX512-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vzeroupper -; AVX512-NEXT: callq __truncdfhf2@PLT +; AVX512-NEXT: callq __truncdfhf2 ; AVX512-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill ; AVX512-NEXT: vmovupd {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm0 ; AVX512-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vzeroupper -; AVX512-NEXT: callq __truncdfhf2@PLT +; AVX512-NEXT: callq __truncdfhf2 ; AVX512-NEXT: movl %eax, %r12d ; AVX512-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX512-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vzeroupper -; AVX512-NEXT: callq __truncdfhf2@PLT +; AVX512-NEXT: callq __truncdfhf2 ; AVX512-NEXT: movl %eax, %r13d ; AVX512-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper -; AVX512-NEXT: callq __truncdfhf2@PLT +; AVX512-NEXT: callq __truncdfhf2 ; AVX512-NEXT: movl %eax, %ebp ; AVX512-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload -; AVX512-NEXT: callq __truncdfhf2@PLT +; AVX512-NEXT: callq __truncdfhf2 ; AVX512-NEXT: movl %eax, %r14d ; AVX512-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512-NEXT: vzeroupper -; AVX512-NEXT: callq __truncdfhf2@PLT +; AVX512-NEXT: callq __truncdfhf2 ; AVX512-NEXT: movl %eax, %r15d ; AVX512-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload -; AVX512-NEXT: callq __truncdfhf2@PLT +; AVX512-NEXT: callq __truncdfhf2 ; AVX512-NEXT: movw %ax, 12(%rbx) ; AVX512-NEXT: movw %r15w, 8(%rbx) ; AVX512-NEXT: movw %r14w, 4(%rbx)