Index: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp @@ -8389,6 +8389,11 @@ MI.getOpcode() == X86::PUSH64r)) return nullptr; + // Avoid partial register update stalls unless optimizing for size. + // TODO: we should block undef reg update as well. + if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI.getOpcode())) + return nullptr; + unsigned NumOps = MI.getDesc().getNumOperands(); bool isTwoAddr = NumOps > 1 && MI.getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1; @@ -8554,6 +8559,7 @@ // Unless optimizing for size, don't fold to avoid partial // register update stalls + // TODO: we should block undef reg update as well. if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI.getOpcode())) return nullptr; @@ -8752,6 +8758,7 @@ if (NoFusing) return nullptr; // Avoid partial register update stalls unless optimizing for size. + // TODO: we should block undef reg update as well. if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI.getOpcode())) return nullptr; Index: llvm/trunk/test/CodeGen/X86/fast-isel-fptrunc-fpext.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fast-isel-fptrunc-fpext.ll +++ llvm/trunk/test/CodeGen/X86/fast-isel-fptrunc-fpext.ll @@ -55,7 +55,8 @@ define double @single_to_double_rm(float* %x) { ; SSE-LABEL: single_to_double_rm: ; SSE: # BB#0: # %entry -; SSE-NEXT: cvtss2sd (%rdi), %xmm0 +; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE-NEXT: cvtss2sd %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: single_to_double_rm: @@ -69,10 +70,28 @@ ret double %conv } +define double @single_to_double_rm_optsize(float* %x) optsize { +; SSE-LABEL: single_to_double_rm_optsize: +; SSE: # BB#0: # %entry +; SSE-NEXT: cvtss2sd (%rdi), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: single_to_double_rm_optsize: +; AVX: # BB#0: # %entry +; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %0 = load float, float* %x, align 4 + %conv = fpext float %0 to double + ret double %conv +} + define float @double_to_single_rm(double* %x) { ; SSE-LABEL: double_to_single_rm: ; SSE: # BB#0: # %entry -; SSE-NEXT: cvtsd2ss (%rdi), %xmm0 +; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE-NEXT: cvtsd2ss %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: double_to_single_rm: @@ -85,3 +104,20 @@ %conv = fptrunc double %0 to float ret float %conv } + +define float @double_to_single_rm_optsize(double* %x) optsize { +; SSE-LABEL: double_to_single_rm_optsize: +; SSE: # BB#0: # %entry +; SSE-NEXT: cvtsd2ss (%rdi), %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: double_to_single_rm_optsize: +; AVX: # BB#0: # %entry +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %0 = load double, double* %x, align 8 + %conv = fptrunc double %0 to float + ret float %conv +} Index: llvm/trunk/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll +++ llvm/trunk/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll @@ -21,7 +21,8 @@ define double @long_to_double_rm(i64* %a) { ; SSE2-LABEL: long_to_double_rm: ; SSE2: # BB#0: # %entry -; SSE2-NEXT: cvtsi2sdq (%rdi), %xmm0 +; SSE2-NEXT: movq (%rdi), %rax +; SSE2-NEXT: cvtsi2sdq %rax, %xmm0 ; SSE2-NEXT: retq ; ; AVX-LABEL: long_to_double_rm: @@ -34,6 +35,22 @@ ret double %1 } +define double @long_to_double_rm_optsize(i64* %a) optsize { +; SSE2-LABEL: long_to_double_rm_optsize: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: cvtsi2sdq (%rdi), %xmm0 +; SSE2-NEXT: retq +; +; AVX-LABEL: long_to_double_rm_optsize: +; AVX: # BB#0: # %entry +; AVX-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %0 = load i64, i64* %a + %1 = sitofp i64 %0 to double + ret double %1 +} + define float @long_to_float_rr(i64 %a) { ; SSE2-LABEL: long_to_float_rr: ; SSE2: # BB#0: # %entry @@ -52,7 +69,8 @@ define float @long_to_float_rm(i64* %a) { ; SSE2-LABEL: long_to_float_rm: ; SSE2: # BB#0: # %entry -; SSE2-NEXT: cvtsi2ssq (%rdi), %xmm0 +; SSE2-NEXT: movq (%rdi), %rax +; SSE2-NEXT: cvtsi2ssq %rax, %xmm0 ; SSE2-NEXT: retq ; ; AVX-LABEL: long_to_float_rm: @@ -64,3 +82,19 @@ %1 = sitofp i64 %0 to float ret float %1 } + +define float @long_to_float_rm_optsize(i64* %a) optsize { +; SSE2-LABEL: long_to_float_rm_optsize: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: cvtsi2ssq (%rdi), %xmm0 +; SSE2-NEXT: retq +; +; AVX-LABEL: long_to_float_rm_optsize: +; AVX: # BB#0: # %entry +; AVX-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %0 = load i64, i64* %a + %1 = sitofp i64 %0 to float + ret float %1 +} Index: llvm/trunk/test/CodeGen/X86/fast-isel-int-float-conversion.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fast-isel-int-float-conversion.ll +++ llvm/trunk/test/CodeGen/X86/fast-isel-int-float-conversion.ll @@ -58,7 +58,8 @@ define double @int_to_double_rm(i32* %a) { ; SSE2-LABEL: int_to_double_rm: ; SSE2: # BB#0: # %entry -; SSE2-NEXT: cvtsi2sdl (%rdi), %xmm0 +; SSE2-NEXT: movl (%rdi), %eax +; SSE2-NEXT: cvtsi2sdl %eax, %xmm0 ; SSE2-NEXT: retq ; ; AVX-LABEL: int_to_double_rm: @@ -107,6 +108,58 @@ ret double %1 } +define double @int_to_double_rm_optsize(i32* %a) optsize { +; SSE2-LABEL: int_to_double_rm_optsize: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: cvtsi2sdl (%rdi), %xmm0 +; SSE2-NEXT: retq +; +; AVX-LABEL: int_to_double_rm_optsize: +; AVX: # BB#0: # %entry +; AVX-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0 +; AVX-NEXT: retq +; +; SSE2_X86-LABEL: int_to_double_rm_optsize: +; SSE2_X86: # BB#0: # %entry +; SSE2_X86-NEXT: pushl %ebp +; SSE2_X86-NEXT: .cfi_def_cfa_offset 8 +; SSE2_X86-NEXT: .cfi_offset %ebp, -8 +; SSE2_X86-NEXT: movl %esp, %ebp +; SSE2_X86-NEXT: .cfi_def_cfa_register %ebp +; SSE2_X86-NEXT: andl $-8, %esp +; SSE2_X86-NEXT: subl $8, %esp +; SSE2_X86-NEXT: movl 8(%ebp), %eax +; SSE2_X86-NEXT: cvtsi2sdl (%eax), %xmm0 +; SSE2_X86-NEXT: movsd %xmm0, (%esp) +; SSE2_X86-NEXT: fldl (%esp) +; SSE2_X86-NEXT: movl %ebp, %esp +; SSE2_X86-NEXT: popl %ebp +; SSE2_X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE2_X86-NEXT: retl +; +; AVX_X86-LABEL: int_to_double_rm_optsize: +; AVX_X86: # BB#0: # %entry +; AVX_X86-NEXT: pushl %ebp +; AVX_X86-NEXT: .cfi_def_cfa_offset 8 +; AVX_X86-NEXT: .cfi_offset %ebp, -8 +; AVX_X86-NEXT: movl %esp, %ebp +; AVX_X86-NEXT: .cfi_def_cfa_register %ebp +; AVX_X86-NEXT: andl $-8, %esp +; AVX_X86-NEXT: subl $8, %esp +; AVX_X86-NEXT: movl 8(%ebp), %eax +; AVX_X86-NEXT: vcvtsi2sdl (%eax), %xmm0, %xmm0 +; AVX_X86-NEXT: vmovsd %xmm0, (%esp) +; AVX_X86-NEXT: fldl (%esp) +; AVX_X86-NEXT: movl %ebp, %esp +; AVX_X86-NEXT: popl %ebp +; AVX_X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX_X86-NEXT: retl +entry: + %0 = load i32, i32* %a + %1 = sitofp i32 %0 to double + ret double %1 +} + define float @int_to_float_rr(i32 %a) { ; SSE2-LABEL: int_to_float_rr: ; SSE2: # BB#0: # %entry @@ -148,7 +201,8 @@ define float @int_to_float_rm(i32* %a) { ; SSE2-LABEL: int_to_float_rm: ; SSE2: # BB#0: # %entry -; SSE2-NEXT: cvtsi2ssl (%rdi), %xmm0 +; SSE2-NEXT: movl (%rdi), %eax +; SSE2-NEXT: cvtsi2ssl %eax, %xmm0 ; SSE2-NEXT: retq ; ; AVX-LABEL: int_to_float_rm: @@ -184,3 +238,43 @@ %1 = sitofp i32 %0 to float ret float %1 } + +define float @int_to_float_rm_optsize(i32* %a) optsize { +; SSE2-LABEL: int_to_float_rm_optsize: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: cvtsi2ssl (%rdi), %xmm0 +; SSE2-NEXT: retq +; +; AVX-LABEL: int_to_float_rm_optsize: +; AVX: # BB#0: # %entry +; AVX-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0 +; AVX-NEXT: retq +; +; SSE2_X86-LABEL: int_to_float_rm_optsize: +; SSE2_X86: # BB#0: # %entry +; SSE2_X86-NEXT: pushl %eax +; SSE2_X86-NEXT: .cfi_def_cfa_offset 8 +; SSE2_X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE2_X86-NEXT: cvtsi2ssl (%eax), %xmm0 +; SSE2_X86-NEXT: movss %xmm0, (%esp) +; SSE2_X86-NEXT: flds (%esp) +; SSE2_X86-NEXT: popl %eax +; SSE2_X86-NEXT: .cfi_def_cfa_offset 4 +; SSE2_X86-NEXT: retl +; +; AVX_X86-LABEL: int_to_float_rm_optsize: +; AVX_X86: # BB#0: # %entry +; AVX_X86-NEXT: pushl %eax +; AVX_X86-NEXT: .cfi_def_cfa_offset 8 +; AVX_X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX_X86-NEXT: vcvtsi2ssl (%eax), %xmm0, %xmm0 +; AVX_X86-NEXT: vmovss %xmm0, (%esp) +; AVX_X86-NEXT: flds (%esp) +; AVX_X86-NEXT: popl %eax +; AVX_X86-NEXT: .cfi_def_cfa_offset 4 +; AVX_X86-NEXT: retl +entry: + %0 = load i32, i32* %a + %1 = sitofp i32 %0 to float + ret float %1 +}