diff --git a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll @@ -0,0 +1,892 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=CHECK-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK-SSE2,CHECK-AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK-SSE2,CHECK-AVX2,CHECK-AVX512F + +declare i16 @llvm.umax.i16(i16, i16) +declare i64 @llvm.umin.i64(i64, i64) + +define double @fmul_pow_shl_cnt(i64 %cnt) { +; CHECK-SSE-LABEL: fmul_pow_shl_cnt: +; CHECK-SSE: # %bb.0: +; CHECK-SSE-NEXT: movq %rdi, %rcx +; CHECK-SSE-NEXT: movl $1, %eax +; CHECK-SSE-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-SSE-NEXT: shlq %cl, %rax +; CHECK-SSE-NEXT: movq %rax, %xmm1 +; CHECK-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] +; CHECK-SSE-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE-NEXT: movapd %xmm1, %xmm0 +; CHECK-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; CHECK-SSE-NEXT: addsd %xmm1, %xmm0 +; CHECK-SSE-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE-NEXT: retq +; +; CHECK-AVX512F-LABEL: fmul_pow_shl_cnt: +; CHECK-AVX512F: # %bb.0: +; CHECK-AVX512F-NEXT: movq %rdi, %rcx +; CHECK-AVX512F-NEXT: movl $1, %eax +; CHECK-AVX512F-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-AVX512F-NEXT: shlq %cl, %rax +; CHECK-AVX512F-NEXT: vcvtusi2sd %rax, %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: retq + %shl = shl nuw i64 1, %cnt + %conv = uitofp i64 %shl to double + %mul = fmul double 9.000000e+00, %conv + ret double %mul +} + +define double @fmul_pow_shl_cnt2(i64 %cnt) { +; CHECK-SSE-LABEL: fmul_pow_shl_cnt2: +; CHECK-SSE: # %bb.0: +; CHECK-SSE-NEXT: movq %rdi, %rcx +; CHECK-SSE-NEXT: movl $2, %eax +; CHECK-SSE-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-SSE-NEXT: shlq %cl, %rax +; CHECK-SSE-NEXT: movq %rax, %xmm1 +; CHECK-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] +; CHECK-SSE-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE-NEXT: movapd %xmm1, %xmm0 +; CHECK-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; CHECK-SSE-NEXT: addsd %xmm1, %xmm0 +; CHECK-SSE-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE-NEXT: retq +; +; CHECK-AVX512F-LABEL: fmul_pow_shl_cnt2: +; CHECK-AVX512F: # %bb.0: +; CHECK-AVX512F-NEXT: movq %rdi, %rcx +; CHECK-AVX512F-NEXT: movl $2, %eax +; CHECK-AVX512F-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-AVX512F-NEXT: shlq %cl, %rax +; CHECK-AVX512F-NEXT: vcvtusi2sd %rax, %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: retq + %shl = shl nuw i64 2, %cnt + %conv = uitofp i64 %shl to double + %mul = fmul double -9.000000e+00, %conv + ret double %mul +} + +define float @fmul_pow_select(i32 %cnt, i1 %c) { +; CHECK-SSE-LABEL: fmul_pow_select: +; CHECK-SSE: # %bb.0: +; CHECK-SSE-NEXT: movl %edi, %ecx +; CHECK-SSE-NEXT: andl $1, %esi +; CHECK-SSE-NEXT: movl $2, %eax +; CHECK-SSE-NEXT: subl %esi, %eax +; CHECK-SSE-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-SSE-NEXT: shll %cl, %eax +; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm0 +; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE-NEXT: retq +; +; CHECK-AVX512F-LABEL: fmul_pow_select: +; CHECK-AVX512F: # %bb.0: +; CHECK-AVX512F-NEXT: movl %edi, %ecx +; CHECK-AVX512F-NEXT: andl $1, %esi +; CHECK-AVX512F-NEXT: movl $2, %eax +; CHECK-AVX512F-NEXT: subl %esi, %eax +; CHECK-AVX512F-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-AVX512F-NEXT: shll %cl, %eax +; CHECK-AVX512F-NEXT: vcvtusi2ss %eax, %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: retq + %shl2 = shl nuw i32 2, %cnt + %shl1 = shl nuw i32 1, %cnt + %shl = select i1 %c, i32 %shl1, i32 %shl2 + %conv = uitofp i32 %shl to float + %mul = fmul float 9.000000e+00, %conv + ret float %mul +} + +define float @fmul_fly_pow_mul_min_pow2(i64 %cnt) { +; CHECK-SSE-LABEL: fmul_fly_pow_mul_min_pow2: +; CHECK-SSE: # %bb.0: +; CHECK-SSE-NEXT: movq %rdi, %rcx +; CHECK-SSE-NEXT: movl $8, %eax +; CHECK-SSE-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-SSE-NEXT: shlq %cl, %rax +; CHECK-SSE-NEXT: cmpq $8192, %rax # imm = 0x2000 +; CHECK-SSE-NEXT: movl $8192, %ecx # imm = 0x2000 +; CHECK-SSE-NEXT: cmovbq %rax, %rcx +; CHECK-SSE-NEXT: cvtsi2ss %rcx, %xmm0 +; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE-NEXT: retq +; +; CHECK-SSE2-LABEL: fmul_fly_pow_mul_min_pow2: +; CHECK-SSE2: # %bb.0: +; CHECK-SSE2-NEXT: movq %rdi, %rcx +; CHECK-SSE2-NEXT: movl $8, %eax +; CHECK-SSE2-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-SSE2-NEXT: shlq %cl, %rax +; CHECK-SSE2-NEXT: cmpq $8192, %rax # imm = 0x2000 +; CHECK-SSE2-NEXT: movl $8192, %ecx # imm = 0x2000 +; CHECK-SSE2-NEXT: cmovbq %rax, %rcx +; CHECK-SSE2-NEXT: vcvtsi2ss %rcx, %xmm0, %xmm0 +; CHECK-SSE2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-SSE2-NEXT: retq + %shl8 = shl nuw i64 8, %cnt + %shl = call i64 @llvm.umin.i64(i64 %shl8, i64 8192) + %conv = uitofp i64 %shl to float + %mul = fmul float 9.000000e+00, %conv + ret float %mul +} + +define double @fmul_pow_mul_max_pow2(i16 %cnt) { +; CHECK-SSE-LABEL: fmul_pow_mul_max_pow2: +; CHECK-SSE: # %bb.0: +; CHECK-SSE-NEXT: movl %edi, %ecx +; CHECK-SSE-NEXT: movl $2, %eax +; CHECK-SSE-NEXT: shll %cl, %eax +; CHECK-SSE-NEXT: movl $1, %edx +; CHECK-SSE-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-SSE-NEXT: shll %cl, %edx +; CHECK-SSE-NEXT: cmpw %ax, %dx +; CHECK-SSE-NEXT: cmovbel %eax, %edx +; CHECK-SSE-NEXT: movzwl %dx, %eax +; CHECK-SSE-NEXT: cvtsi2sd %eax, %xmm0 +; CHECK-SSE-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE-NEXT: retq +; +; CHECK-SSE2-LABEL: fmul_pow_mul_max_pow2: +; CHECK-SSE2: # %bb.0: +; CHECK-SSE2-NEXT: movl %edi, %ecx +; CHECK-SSE2-NEXT: movl $2, %eax +; CHECK-SSE2-NEXT: shll %cl, %eax +; CHECK-SSE2-NEXT: movl $1, %edx +; CHECK-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-SSE2-NEXT: shll %cl, %edx +; CHECK-SSE2-NEXT: cmpw %ax, %dx +; CHECK-SSE2-NEXT: cmovbel %eax, %edx +; CHECK-SSE2-NEXT: movzwl %dx, %eax +; CHECK-SSE2-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0 +; CHECK-SSE2-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-SSE2-NEXT: retq + %shl2 = shl nuw i16 2, %cnt + %shl1 = shl nuw i16 1, %cnt + %shl = call i16 @llvm.umax.i16(i16 %shl1, i16 %shl2) + %conv = uitofp i16 %shl to double + %mul = fmul double 3.000000e+00, %conv + ret double %mul +} + +define double @fmul_pow_shl_cnt_fail_maybe_non_pow2(i64 %v, i64 %cnt) { +; CHECK-SSE-LABEL: fmul_pow_shl_cnt_fail_maybe_non_pow2: +; CHECK-SSE: # %bb.0: +; CHECK-SSE-NEXT: movq %rsi, %rcx +; CHECK-SSE-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-SSE-NEXT: shlq %cl, %rdi +; CHECK-SSE-NEXT: movq %rdi, %xmm1 +; CHECK-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] +; CHECK-SSE-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE-NEXT: movapd %xmm1, %xmm0 +; CHECK-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; CHECK-SSE-NEXT: addsd %xmm1, %xmm0 +; CHECK-SSE-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE-NEXT: retq +; +; CHECK-AVX512F-LABEL: fmul_pow_shl_cnt_fail_maybe_non_pow2: +; CHECK-AVX512F: # %bb.0: +; CHECK-AVX512F-NEXT: movq %rsi, %rcx +; CHECK-AVX512F-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-AVX512F-NEXT: shlq %cl, %rdi +; CHECK-AVX512F-NEXT: vcvtusi2sd %rdi, %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: retq + %shl = shl nuw i64 %v, %cnt + %conv = uitofp i64 %shl to double + %mul = fmul double 9.000000e+00, %conv + ret double %mul +} + +define <2 x float> @fmul_pow_shl_cnt_vec_fail_expensive_cast(<2 x i64> %cnt) { +; CHECK-SSE-LABEL: fmul_pow_shl_cnt_vec_fail_expensive_cast: +; CHECK-SSE: # %bb.0: +; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] +; CHECK-SSE-NEXT: movdqa {{.*#+}} xmm3 = [2,2] +; CHECK-SSE-NEXT: movdqa %xmm3, %xmm1 +; CHECK-SSE-NEXT: psllq %xmm2, %xmm1 +; CHECK-SSE-NEXT: psllq %xmm0, %xmm3 +; CHECK-SSE-NEXT: movq %xmm3, %rax +; CHECK-SSE-NEXT: testq %rax, %rax +; CHECK-SSE-NEXT: js .LBB6_1 +; CHECK-SSE-NEXT: # %bb.2: +; CHECK-SSE-NEXT: xorps %xmm0, %xmm0 +; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm0 +; CHECK-SSE-NEXT: jmp .LBB6_3 +; CHECK-SSE-NEXT: .LBB6_1: +; CHECK-SSE-NEXT: movq %rax, %rcx +; CHECK-SSE-NEXT: shrq %rcx +; CHECK-SSE-NEXT: andl $1, %eax +; CHECK-SSE-NEXT: orq %rcx, %rax +; CHECK-SSE-NEXT: xorps %xmm0, %xmm0 +; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm0 +; CHECK-SSE-NEXT: addss %xmm0, %xmm0 +; CHECK-SSE-NEXT: .LBB6_3: +; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] +; CHECK-SSE-NEXT: movq %xmm1, %rax +; CHECK-SSE-NEXT: testq %rax, %rax +; CHECK-SSE-NEXT: js .LBB6_4 +; CHECK-SSE-NEXT: # %bb.5: +; CHECK-SSE-NEXT: xorps %xmm1, %xmm1 +; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm1 +; CHECK-SSE-NEXT: jmp .LBB6_6 +; CHECK-SSE-NEXT: .LBB6_4: +; CHECK-SSE-NEXT: movq %rax, %rcx +; CHECK-SSE-NEXT: shrq %rcx +; CHECK-SSE-NEXT: andl $1, %eax +; CHECK-SSE-NEXT: orq %rcx, %rax +; CHECK-SSE-NEXT: xorps %xmm1, %xmm1 +; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm1 +; CHECK-SSE-NEXT: addss %xmm1, %xmm1 +; CHECK-SSE-NEXT: .LBB6_6: +; CHECK-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-SSE-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE-NEXT: retq +; +; CHECK-AVX512F-LABEL: fmul_pow_shl_cnt_vec_fail_expensive_cast: +; CHECK-AVX512F: # %bb.0: +; CHECK-AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2] +; CHECK-AVX512F-NEXT: vpsllvq %xmm0, %xmm1, %xmm0 +; CHECK-AVX512F-NEXT: vpextrq $1, %xmm0, %rax +; CHECK-AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1 +; CHECK-AVX512F-NEXT: vmovq %xmm0, %rax +; CHECK-AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 +; CHECK-AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero +; CHECK-AVX512F-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.5E+1,1.5E+1,1.5E+1,1.5E+1] +; CHECK-AVX512F-NEXT: vmulps %xmm1, %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: retq + %shl = shl nsw nuw <2 x i64> , %cnt + %conv = uitofp <2 x i64> %shl to <2 x float> + %mul = fmul <2 x float> , %conv + ret <2 x float> %mul +} + +define <2 x double> @fmul_pow_shl_cnt_vec(<2 x i64> %cnt) { +; CHECK-SSE-LABEL: fmul_pow_shl_cnt_vec: +; CHECK-SSE: # %bb.0: +; CHECK-SSE-NEXT: movdqa {{.*#+}} xmm1 = [2,2] +; CHECK-SSE-NEXT: movdqa %xmm1, %xmm2 +; CHECK-SSE-NEXT: psllq %xmm0, %xmm2 +; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; CHECK-SSE-NEXT: psllq %xmm0, %xmm1 +; CHECK-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] +; CHECK-SSE-NEXT: movapd {{.*#+}} xmm0 = [4294967295,4294967295] +; CHECK-SSE-NEXT: andpd %xmm1, %xmm0 +; CHECK-SSE-NEXT: orpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE-NEXT: psrlq $32, %xmm1 +; CHECK-SSE-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE-NEXT: addpd %xmm0, %xmm1 +; CHECK-SSE-NEXT: mulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE-NEXT: movapd %xmm1, %xmm0 +; CHECK-SSE-NEXT: retq +; +; CHECK-SSE2-LABEL: fmul_pow_shl_cnt_vec: +; CHECK-SSE2: # %bb.0: +; CHECK-SSE2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2] +; CHECK-SSE2-NEXT: vpsllvq %xmm0, %xmm1, %xmm0 +; CHECK-SSE2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-SSE2-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; CHECK-SSE2-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; CHECK-SSE2-NEXT: vpsrlq $32, %xmm0, %xmm0 +; CHECK-SSE2-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-SSE2-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-SSE2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 +; CHECK-SSE2-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-SSE2-NEXT: retq + %shl = shl nsw nuw <2 x i64> , %cnt + %conv = uitofp <2 x i64> %shl to <2 x double> + %mul = fmul <2 x double> , %conv + ret <2 x double> %mul +} + +define <2 x double> @fmul_pow_shl_cnt_vec_non_splat_todo(<2 x i64> %cnt) { +; CHECK-SSE-LABEL: fmul_pow_shl_cnt_vec_non_splat_todo: +; CHECK-SSE: # %bb.0: +; CHECK-SSE-NEXT: movdqa {{.*#+}} xmm1 = [2,2] +; CHECK-SSE-NEXT: movdqa %xmm1, %xmm2 +; CHECK-SSE-NEXT: psllq %xmm0, %xmm2 +; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; CHECK-SSE-NEXT: psllq %xmm0, %xmm1 +; CHECK-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] +; CHECK-SSE-NEXT: movapd {{.*#+}} xmm0 = [4294967295,4294967295] +; CHECK-SSE-NEXT: andpd %xmm1, %xmm0 +; CHECK-SSE-NEXT: orpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE-NEXT: psrlq $32, %xmm1 +; CHECK-SSE-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE-NEXT: addpd %xmm0, %xmm1 +; CHECK-SSE-NEXT: mulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE-NEXT: movapd %xmm1, %xmm0 +; CHECK-SSE-NEXT: retq +; +; CHECK-SSE2-LABEL: fmul_pow_shl_cnt_vec_non_splat_todo: +; CHECK-SSE2: # %bb.0: +; CHECK-SSE2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2] +; CHECK-SSE2-NEXT: vpsllvq %xmm0, %xmm1, %xmm0 +; CHECK-SSE2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-SSE2-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; CHECK-SSE2-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; CHECK-SSE2-NEXT: vpsrlq $32, %xmm0, %xmm0 +; CHECK-SSE2-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-SSE2-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-SSE2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 +; CHECK-SSE2-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-SSE2-NEXT: retq + %shl = shl nsw nuw <2 x i64> , %cnt + %conv = uitofp <2 x i64> %shl to <2 x double> + %mul = fmul <2 x double> , %conv + ret <2 x double> %mul +} + +define <2 x double> @fmul_pow_shl_cnt_vec_non_splat2_todo(<2 x i64> %cnt) { +; CHECK-SSE-LABEL: fmul_pow_shl_cnt_vec_non_splat2_todo: +; CHECK-SSE: # %bb.0: +; CHECK-SSE-NEXT: movdqa {{.*#+}} xmm1 = [2,1] +; CHECK-SSE-NEXT: movdqa %xmm1, %xmm2 +; CHECK-SSE-NEXT: psllq %xmm0, %xmm2 +; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; CHECK-SSE-NEXT: psllq %xmm0, %xmm1 +; CHECK-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] +; CHECK-SSE-NEXT: movapd {{.*#+}} xmm0 = [4294967295,4294967295] +; CHECK-SSE-NEXT: andpd %xmm1, %xmm0 +; CHECK-SSE-NEXT: orpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE-NEXT: psrlq $32, %xmm1 +; CHECK-SSE-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE-NEXT: addpd %xmm0, %xmm1 +; CHECK-SSE-NEXT: mulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE-NEXT: movapd %xmm1, %xmm0 +; CHECK-SSE-NEXT: retq +; +; CHECK-SSE2-LABEL: fmul_pow_shl_cnt_vec_non_splat2_todo: +; CHECK-SSE2: # %bb.0: +; CHECK-SSE2-NEXT: vmovdqa {{.*#+}} xmm1 = [2,1] +; CHECK-SSE2-NEXT: vpsllvq %xmm0, %xmm1, %xmm0 +; CHECK-SSE2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-SSE2-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; CHECK-SSE2-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; CHECK-SSE2-NEXT: vpsrlq $32, %xmm0, %xmm0 +; CHECK-SSE2-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-SSE2-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-SSE2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 +; CHECK-SSE2-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-SSE2-NEXT: retq + %shl = shl nsw nuw <2 x i64> , %cnt + %conv = uitofp <2 x i64> %shl to <2 x double> + %mul = fmul <2 x double> , %conv + ret <2 x double> %mul +} + +define <2 x half> @fmul_pow_shl_cnt_vec_fail_to_large(<2 x i16> %cnt) { +; CHECK-SSE-LABEL: fmul_pow_shl_cnt_vec_fail_to_large: +; CHECK-SSE: # %bb.0: +; CHECK-SSE-NEXT: subq $40, %rsp +; CHECK-SSE-NEXT: .cfi_def_cfa_offset 48 +; CHECK-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; CHECK-SSE-NEXT: pslld $23, %xmm0 +; CHECK-SSE-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE-NEXT: cvttps2dq %xmm0, %xmm0 +; CHECK-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; CHECK-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-SSE-NEXT: pextrw $1, %xmm0, %eax +; CHECK-SSE-NEXT: xorps %xmm0, %xmm0 +; CHECK-SSE-NEXT: cvtsi2ss %eax, %xmm0 +; CHECK-SSE-NEXT: callq __truncsfhf2@PLT +; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-SSE-NEXT: pextrw $0, %xmm0, %eax +; CHECK-SSE-NEXT: xorps %xmm0, %xmm0 +; CHECK-SSE-NEXT: cvtsi2ss %eax, %xmm0 +; CHECK-SSE-NEXT: callq __truncsfhf2@PLT +; CHECK-SSE-NEXT: callq __extendhfsf2@PLT +; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE-NEXT: callq __truncsfhf2@PLT +; CHECK-SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; CHECK-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero +; CHECK-SSE-NEXT: callq __extendhfsf2@PLT +; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE-NEXT: callq __truncsfhf2@PLT +; CHECK-SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; CHECK-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; CHECK-SSE-NEXT: movdqa %xmm1, %xmm0 +; CHECK-SSE-NEXT: addq $40, %rsp +; CHECK-SSE-NEXT: .cfi_def_cfa_offset 8 +; CHECK-SSE-NEXT: retq +; +; CHECK-AVX512F-LABEL: fmul_pow_shl_cnt_vec_fail_to_large: +; CHECK-AVX512F: # %bb.0: +; CHECK-AVX512F-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; CHECK-AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [2,2,0,0,0,0,0,0] +; CHECK-AVX512F-NEXT: vpsllvd %ymm0, %ymm1, %ymm0 +; CHECK-AVX512F-NEXT: vpmovdw %zmm0, %ymm1 +; CHECK-AVX512F-NEXT: vpextrw $0, %xmm0, %eax +; CHECK-AVX512F-NEXT: vcvtsi2ss %eax, %xmm2, %xmm0 +; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: vmovd %xmm0, %eax +; CHECK-AVX512F-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: vpextrw $1, %xmm1, %eax +; CHECK-AVX512F-NEXT: vcvtsi2ss %eax, %xmm2, %xmm1 +; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm1, %xmm1 +; CHECK-AVX512F-NEXT: vmovd %xmm1, %eax +; CHECK-AVX512F-NEXT: vpinsrw $0, %eax, %xmm0, %xmm1 +; CHECK-AVX512F-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; CHECK-AVX512F-NEXT: vmovaps {{.*#+}} xmm1 = [16,0,0,0] +; CHECK-AVX512F-NEXT: xorl %eax, %eax +; CHECK-AVX512F-NEXT: vcvtsi2ss %eax, %xmm2, %xmm2 +; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm2, %xmm2 +; CHECK-AVX512F-NEXT: vmovd %xmm2, %eax +; CHECK-AVX512F-NEXT: vmovd %eax, %xmm2 +; CHECK-AVX512F-NEXT: vpbroadcastw %xmm2, %xmm2 +; CHECK-AVX512F-NEXT: vpermt2ps %zmm0, %zmm1, %zmm2 +; CHECK-AVX512F-NEXT: vcvtph2ps %xmm2, %ymm0 +; CHECK-AVX512F-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.5E+1,1.5E+1,1.5E+1,1.5E+1,1.5E+1,1.5E+1,1.5E+1,1.5E+1] +; CHECK-AVX512F-NEXT: vmulps %ymm1, %ymm0, %ymm0 +; CHECK-AVX512F-NEXT: vcvtps2ph $4, %ymm0, %xmm0 +; CHECK-AVX512F-NEXT: vzeroupper +; CHECK-AVX512F-NEXT: retq + %shl = shl nsw nuw <2 x i16> , %cnt + %conv = uitofp <2 x i16> %shl to <2 x half> + %mul = fmul <2 x half> , %conv + ret <2 x half> %mul +} + +define double @fmul_pow_shl_cnt_fail_maybe_bad_exp(i64 %cnt) { +; CHECK-SSE-LABEL: fmul_pow_shl_cnt_fail_maybe_bad_exp: +; CHECK-SSE: # %bb.0: +; CHECK-SSE-NEXT: movq %rdi, %rcx +; CHECK-SSE-NEXT: movl $1, %eax +; CHECK-SSE-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-SSE-NEXT: shlq %cl, %rax +; CHECK-SSE-NEXT: movq %rax, %xmm1 +; CHECK-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] +; CHECK-SSE-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE-NEXT: movapd %xmm1, %xmm0 +; CHECK-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; CHECK-SSE-NEXT: addsd %xmm1, %xmm0 +; CHECK-SSE-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE-NEXT: retq +; +; CHECK-AVX512F-LABEL: fmul_pow_shl_cnt_fail_maybe_bad_exp: +; CHECK-AVX512F: # %bb.0: +; CHECK-AVX512F-NEXT: movq %rdi, %rcx +; CHECK-AVX512F-NEXT: movl $1, %eax +; CHECK-AVX512F-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-AVX512F-NEXT: shlq %cl, %rax +; CHECK-AVX512F-NEXT: vcvtusi2sd %rax, %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: retq + %shl = shl nuw i64 1, %cnt + %conv = uitofp i64 %shl to double + %mul = fmul double 9.745314e+288, %conv + ret double %mul +} + +define double @fmul_pow_shl_cnt_safe(i16 %cnt) { +; CHECK-SSE-LABEL: fmul_pow_shl_cnt_safe: +; CHECK-SSE: # %bb.0: +; CHECK-SSE-NEXT: movl %edi, %ecx +; CHECK-SSE-NEXT: movl $1, %eax +; CHECK-SSE-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-SSE-NEXT: shll %cl, %eax +; CHECK-SSE-NEXT: movzwl %ax, %eax +; CHECK-SSE-NEXT: cvtsi2sd %eax, %xmm0 +; CHECK-SSE-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE-NEXT: retq +; +; CHECK-SSE2-LABEL: fmul_pow_shl_cnt_safe: +; CHECK-SSE2: # %bb.0: +; CHECK-SSE2-NEXT: movl %edi, %ecx +; CHECK-SSE2-NEXT: movl $1, %eax +; CHECK-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-SSE2-NEXT: shll %cl, %eax +; CHECK-SSE2-NEXT: movzwl %ax, %eax +; CHECK-SSE2-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0 +; CHECK-SSE2-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-SSE2-NEXT: retq + %shl = shl nuw i16 1, %cnt + %conv = uitofp i16 %shl to double + %mul = fmul double 9.745314e+288, %conv + ret double %mul +} + +define <2 x double> @fdiv_pow_shl_cnt_vec(<2 x i64> %cnt) { +; CHECK-SSE-LABEL: fdiv_pow_shl_cnt_vec: +; CHECK-SSE: # %bb.0: +; CHECK-SSE-NEXT: movdqa {{.*#+}} xmm1 = [1,1] +; CHECK-SSE-NEXT: movdqa %xmm1, %xmm2 +; CHECK-SSE-NEXT: psllq %xmm0, %xmm2 +; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; CHECK-SSE-NEXT: psllq %xmm0, %xmm1 +; CHECK-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] +; CHECK-SSE-NEXT: movapd {{.*#+}} xmm0 = [4294967295,4294967295] +; CHECK-SSE-NEXT: andpd %xmm1, %xmm0 +; CHECK-SSE-NEXT: orpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE-NEXT: psrlq $32, %xmm1 +; CHECK-SSE-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE-NEXT: addpd %xmm0, %xmm1 +; CHECK-SSE-NEXT: movapd {{.*#+}} xmm0 = [1.0E+0,1.0E+0] +; CHECK-SSE-NEXT: divpd %xmm1, %xmm0 +; CHECK-SSE-NEXT: retq +; +; CHECK-SSE2-LABEL: fdiv_pow_shl_cnt_vec: +; CHECK-SSE2: # %bb.0: +; CHECK-SSE2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,1] +; CHECK-SSE2-NEXT: vpsllvq %xmm0, %xmm1, %xmm0 +; CHECK-SSE2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-SSE2-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; CHECK-SSE2-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; CHECK-SSE2-NEXT: vpsrlq $32, %xmm0, %xmm0 +; CHECK-SSE2-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-SSE2-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-SSE2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 +; CHECK-SSE2-NEXT: vmovddup {{.*#+}} xmm1 = [1.0E+0,1.0E+0] +; CHECK-SSE2-NEXT: # xmm1 = mem[0,0] +; CHECK-SSE2-NEXT: vdivpd %xmm0, %xmm1, %xmm0 +; CHECK-SSE2-NEXT: retq + %shl = shl nuw <2 x i64> , %cnt + %conv = uitofp <2 x i64> %shl to <2 x double> + %mul = fdiv <2 x double> , %conv + ret <2 x double> %mul +} + +define <2 x float> @fdiv_pow_shl_cnt_vec_with_expensive_cast(<2 x i64> %cnt) { +; CHECK-SSE-LABEL: fdiv_pow_shl_cnt_vec_with_expensive_cast: +; CHECK-SSE: # %bb.0: +; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; CHECK-SSE-NEXT: movdqa {{.*#+}} xmm3 = [1,1] +; CHECK-SSE-NEXT: movdqa %xmm3, %xmm2 +; CHECK-SSE-NEXT: psllq %xmm1, %xmm2 +; CHECK-SSE-NEXT: psllq %xmm0, %xmm3 +; CHECK-SSE-NEXT: movq %xmm3, %rax +; CHECK-SSE-NEXT: testq %rax, %rax +; CHECK-SSE-NEXT: js .LBB14_1 +; CHECK-SSE-NEXT: # %bb.2: +; CHECK-SSE-NEXT: xorps %xmm1, %xmm1 +; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm1 +; CHECK-SSE-NEXT: jmp .LBB14_3 +; CHECK-SSE-NEXT: .LBB14_1: +; CHECK-SSE-NEXT: movq %rax, %rcx +; CHECK-SSE-NEXT: shrq %rcx +; CHECK-SSE-NEXT: andl $1, %eax +; CHECK-SSE-NEXT: orq %rcx, %rax +; CHECK-SSE-NEXT: xorps %xmm1, %xmm1 +; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm1 +; CHECK-SSE-NEXT: addss %xmm1, %xmm1 +; CHECK-SSE-NEXT: .LBB14_3: +; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3] +; CHECK-SSE-NEXT: movq %xmm0, %rax +; CHECK-SSE-NEXT: testq %rax, %rax +; CHECK-SSE-NEXT: js .LBB14_4 +; CHECK-SSE-NEXT: # %bb.5: +; CHECK-SSE-NEXT: xorps %xmm0, %xmm0 +; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm0 +; CHECK-SSE-NEXT: jmp .LBB14_6 +; CHECK-SSE-NEXT: .LBB14_4: +; CHECK-SSE-NEXT: movq %rax, %rcx +; CHECK-SSE-NEXT: shrq %rcx +; CHECK-SSE-NEXT: andl $1, %eax +; CHECK-SSE-NEXT: orq %rcx, %rax +; CHECK-SSE-NEXT: xorps %xmm0, %xmm0 +; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm0 +; CHECK-SSE-NEXT: addss %xmm0, %xmm0 +; CHECK-SSE-NEXT: .LBB14_6: +; CHECK-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-SSE-NEXT: movaps {{.*#+}} xmm0 = <1.0E+0,1.0E+0,u,u> +; CHECK-SSE-NEXT: divps %xmm1, %xmm0 +; CHECK-SSE-NEXT: retq +; +; CHECK-AVX512F-LABEL: fdiv_pow_shl_cnt_vec_with_expensive_cast: +; CHECK-AVX512F: # %bb.0: +; CHECK-AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,1] +; CHECK-AVX512F-NEXT: vpsllvq %xmm0, %xmm1, %xmm0 +; CHECK-AVX512F-NEXT: vpextrq $1, %xmm0, %rax +; CHECK-AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1 +; CHECK-AVX512F-NEXT: vmovq %xmm0, %rax +; CHECK-AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 +; CHECK-AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero +; CHECK-AVX512F-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] +; CHECK-AVX512F-NEXT: vdivps %xmm0, %xmm1, %xmm0 +; CHECK-AVX512F-NEXT: retq + %shl = shl nuw <2 x i64> , %cnt + %conv = uitofp <2 x i64> %shl to <2 x float> + %mul = fdiv <2 x float> , %conv + ret <2 x float> %mul +} + +define float @fdiv_pow_shl_cnt_fail_maybe_z(i64 %cnt) { +; CHECK-SSE-LABEL: fdiv_pow_shl_cnt_fail_maybe_z: +; CHECK-SSE: # %bb.0: +; CHECK-SSE-NEXT: movq %rdi, %rcx +; CHECK-SSE-NEXT: movl $8, %eax +; CHECK-SSE-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-SSE-NEXT: shlq %cl, %rax +; CHECK-SSE-NEXT: testq %rax, %rax +; CHECK-SSE-NEXT: js .LBB15_1 +; CHECK-SSE-NEXT: # %bb.2: +; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm1 +; CHECK-SSE-NEXT: jmp .LBB15_3 +; CHECK-SSE-NEXT: .LBB15_1: +; CHECK-SSE-NEXT: shrq %rax +; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm1 +; CHECK-SSE-NEXT: addss %xmm1, %xmm1 +; CHECK-SSE-NEXT: .LBB15_3: +; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-SSE-NEXT: divss %xmm1, %xmm0 +; CHECK-SSE-NEXT: retq +; +; CHECK-AVX512F-LABEL: fdiv_pow_shl_cnt_fail_maybe_z: +; CHECK-AVX512F: # %bb.0: +; CHECK-AVX512F-NEXT: movq %rdi, %rcx +; CHECK-AVX512F-NEXT: movl $8, %eax +; CHECK-AVX512F-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-AVX512F-NEXT: shlq %cl, %rax +; CHECK-AVX512F-NEXT: vcvtusi2ss %rax, %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-AVX512F-NEXT: vdivss %xmm0, %xmm1, %xmm0 +; CHECK-AVX512F-NEXT: retq + %shl = shl i64 8, %cnt + %conv = uitofp i64 %shl to float + %mul = fdiv float -9.000000e+00, %conv + ret float %mul +} + +define float @fdiv_pow_shl_cnt_fail_neg_int(i64 %cnt) { +; CHECK-SSE-LABEL: fdiv_pow_shl_cnt_fail_neg_int: +; CHECK-SSE: # %bb.0: +; CHECK-SSE-NEXT: movq %rdi, %rcx +; CHECK-SSE-NEXT: movl $8, %eax +; CHECK-SSE-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-SSE-NEXT: shlq %cl, %rax +; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm1 +; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-SSE-NEXT: divss %xmm1, %xmm0 +; CHECK-SSE-NEXT: retq +; +; CHECK-SSE2-LABEL: fdiv_pow_shl_cnt_fail_neg_int: +; CHECK-SSE2: # %bb.0: +; CHECK-SSE2-NEXT: movq %rdi, %rcx +; CHECK-SSE2-NEXT: movl $8, %eax +; CHECK-SSE2-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-SSE2-NEXT: shlq %cl, %rax +; CHECK-SSE2-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0 +; CHECK-SSE2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-SSE2-NEXT: vdivss %xmm0, %xmm1, %xmm0 +; CHECK-SSE2-NEXT: retq + %shl = shl i64 8, %cnt + %conv = sitofp i64 %shl to float + %mul = fdiv float -9.000000e+00, %conv + ret float %mul +} + +define float @fdiv_pow_shl_cnt(i64 %cnt_in) { +; CHECK-SSE-LABEL: fdiv_pow_shl_cnt: +; CHECK-SSE: # %bb.0: +; CHECK-SSE-NEXT: movq %rdi, %rcx +; CHECK-SSE-NEXT: andb $31, %cl +; CHECK-SSE-NEXT: movl $8, %eax +; CHECK-SSE-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-SSE-NEXT: shlq %cl, %rax +; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm1 +; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-SSE-NEXT: divss %xmm1, %xmm0 +; CHECK-SSE-NEXT: retq +; +; CHECK-SSE2-LABEL: fdiv_pow_shl_cnt: +; CHECK-SSE2: # %bb.0: +; CHECK-SSE2-NEXT: movq %rdi, %rcx +; CHECK-SSE2-NEXT: andb $31, %cl +; CHECK-SSE2-NEXT: movl $8, %eax +; CHECK-SSE2-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-SSE2-NEXT: shlq %cl, %rax +; CHECK-SSE2-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0 +; CHECK-SSE2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-SSE2-NEXT: vdivss %xmm0, %xmm1, %xmm0 +; CHECK-SSE2-NEXT: retq + %cnt = and i64 %cnt_in, 31 + %shl = shl i64 8, %cnt + %conv = sitofp i64 %shl to float + %mul = fdiv float -0.500000e+00, %conv + ret float %mul +} + +define half @fdiv_pow_shl_cnt_fail_out_of_bounds(i32 %cnt) { +; CHECK-SSE-LABEL: fdiv_pow_shl_cnt_fail_out_of_bounds: +; CHECK-SSE: # %bb.0: +; CHECK-SSE-NEXT: pushq %rax +; CHECK-SSE-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SSE-NEXT: movl %edi, %ecx +; CHECK-SSE-NEXT: movl $1, %eax +; CHECK-SSE-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-SSE-NEXT: shll %cl, %eax +; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm0 +; CHECK-SSE-NEXT: callq __truncsfhf2@PLT +; CHECK-SSE-NEXT: callq __extendhfsf2@PLT +; CHECK-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-SSE-NEXT: divss %xmm0, %xmm1 +; CHECK-SSE-NEXT: movaps %xmm1, %xmm0 +; CHECK-SSE-NEXT: callq __truncsfhf2@PLT +; CHECK-SSE-NEXT: popq %rax +; CHECK-SSE-NEXT: .cfi_def_cfa_offset 8 +; CHECK-SSE-NEXT: retq +; +; CHECK-AVX512F-LABEL: fdiv_pow_shl_cnt_fail_out_of_bounds: +; CHECK-AVX512F: # %bb.0: +; CHECK-AVX512F-NEXT: movl %edi, %ecx +; CHECK-AVX512F-NEXT: movl $1, %eax +; CHECK-AVX512F-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-AVX512F-NEXT: shll %cl, %eax +; CHECK-AVX512F-NEXT: vcvtusi2ss %eax, %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; CHECK-AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-AVX512F-NEXT: vdivss %xmm0, %xmm1, %xmm0 +; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: vmovd %xmm0, %eax +; CHECK-AVX512F-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: retq + %shl = shl nuw i32 1, %cnt + %conv = uitofp i32 %shl to half + %mul = fdiv half 0xH7000, %conv + ret half %mul +} + +define half @fdiv_pow_shl_cnt_in_bounds(i16 %cnt) { +; CHECK-SSE-LABEL: fdiv_pow_shl_cnt_in_bounds: +; CHECK-SSE: # %bb.0: +; CHECK-SSE-NEXT: pushq %rax +; CHECK-SSE-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SSE-NEXT: movl %edi, %ecx +; CHECK-SSE-NEXT: movl $1, %eax +; CHECK-SSE-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-SSE-NEXT: shll %cl, %eax +; CHECK-SSE-NEXT: movzwl %ax, %eax +; CHECK-SSE-NEXT: cvtsi2ss %eax, %xmm0 +; CHECK-SSE-NEXT: callq __truncsfhf2@PLT +; CHECK-SSE-NEXT: callq __extendhfsf2@PLT +; CHECK-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-SSE-NEXT: divss %xmm0, %xmm1 +; CHECK-SSE-NEXT: movaps %xmm1, %xmm0 +; CHECK-SSE-NEXT: callq __truncsfhf2@PLT +; CHECK-SSE-NEXT: popq %rax +; CHECK-SSE-NEXT: .cfi_def_cfa_offset 8 +; CHECK-SSE-NEXT: retq +; +; CHECK-AVX512F-LABEL: fdiv_pow_shl_cnt_in_bounds: +; CHECK-AVX512F: # %bb.0: +; CHECK-AVX512F-NEXT: movl %edi, %ecx +; CHECK-AVX512F-NEXT: movl $1, %eax +; CHECK-AVX512F-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-AVX512F-NEXT: shll %cl, %eax +; CHECK-AVX512F-NEXT: movzwl %ax, %eax +; CHECK-AVX512F-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; CHECK-AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-AVX512F-NEXT: vdivss %xmm0, %xmm1, %xmm0 +; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: vmovd %xmm0, %eax +; CHECK-AVX512F-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: retq + %shl = shl nuw i16 1, %cnt + %conv = uitofp i16 %shl to half + %mul = fdiv half 0xH7000, %conv + ret half %mul +} + +define half @fdiv_pow_shl_cnt_in_bounds2(i16 %cnt) { +; CHECK-SSE-LABEL: fdiv_pow_shl_cnt_in_bounds2: +; CHECK-SSE: # %bb.0: +; CHECK-SSE-NEXT: pushq %rax +; CHECK-SSE-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SSE-NEXT: movl %edi, %ecx +; CHECK-SSE-NEXT: movl $1, %eax +; CHECK-SSE-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-SSE-NEXT: shll %cl, %eax +; CHECK-SSE-NEXT: movzwl %ax, %eax +; CHECK-SSE-NEXT: cvtsi2ss %eax, %xmm0 +; CHECK-SSE-NEXT: callq __truncsfhf2@PLT +; CHECK-SSE-NEXT: callq __extendhfsf2@PLT +; CHECK-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-SSE-NEXT: divss %xmm0, %xmm1 +; CHECK-SSE-NEXT: movaps %xmm1, %xmm0 +; CHECK-SSE-NEXT: callq __truncsfhf2@PLT +; CHECK-SSE-NEXT: popq %rax +; CHECK-SSE-NEXT: .cfi_def_cfa_offset 8 +; CHECK-SSE-NEXT: retq +; +; CHECK-AVX512F-LABEL: fdiv_pow_shl_cnt_in_bounds2: +; CHECK-AVX512F: # %bb.0: +; CHECK-AVX512F-NEXT: movl %edi, %ecx +; CHECK-AVX512F-NEXT: movl $1, %eax +; CHECK-AVX512F-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-AVX512F-NEXT: shll %cl, %eax +; CHECK-AVX512F-NEXT: movzwl %ax, %eax +; CHECK-AVX512F-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; CHECK-AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-AVX512F-NEXT: vdivss %xmm0, %xmm1, %xmm0 +; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: vmovd %xmm0, %eax +; CHECK-AVX512F-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: retq + %shl = shl nuw i16 1, %cnt + %conv = uitofp i16 %shl to half + %mul = fdiv half 0xH4800, %conv + ret half %mul +} + +define half @fdiv_pow_shl_cnt_fail_out_of_bound2(i16 %cnt) { +; CHECK-SSE-LABEL: fdiv_pow_shl_cnt_fail_out_of_bound2: +; CHECK-SSE: # %bb.0: +; CHECK-SSE-NEXT: pushq %rax +; CHECK-SSE-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SSE-NEXT: movl %edi, %ecx +; CHECK-SSE-NEXT: movl $1, %eax +; CHECK-SSE-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-SSE-NEXT: shll %cl, %eax +; CHECK-SSE-NEXT: movzwl %ax, %eax +; CHECK-SSE-NEXT: cvtsi2ss %eax, %xmm0 +; CHECK-SSE-NEXT: callq __truncsfhf2@PLT +; CHECK-SSE-NEXT: callq __extendhfsf2@PLT +; CHECK-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-SSE-NEXT: divss %xmm0, %xmm1 +; CHECK-SSE-NEXT: movaps %xmm1, %xmm0 +; CHECK-SSE-NEXT: callq __truncsfhf2@PLT +; CHECK-SSE-NEXT: popq %rax +; CHECK-SSE-NEXT: .cfi_def_cfa_offset 8 +; CHECK-SSE-NEXT: retq +; +; CHECK-AVX512F-LABEL: fdiv_pow_shl_cnt_fail_out_of_bound2: +; CHECK-AVX512F: # %bb.0: +; CHECK-AVX512F-NEXT: movl %edi, %ecx +; CHECK-AVX512F-NEXT: movl $1, %eax +; CHECK-AVX512F-NEXT: # kill: def $cl killed $cl killed $ecx +; CHECK-AVX512F-NEXT: shll %cl, %eax +; CHECK-AVX512F-NEXT: movzwl %ax, %eax +; CHECK-AVX512F-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; CHECK-AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-AVX512F-NEXT: vdivss %xmm0, %xmm1, %xmm0 +; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: vmovd %xmm0, %eax +; CHECK-AVX512F-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 +; CHECK-AVX512F-NEXT: retq + %shl = shl nuw i16 1, %cnt + %conv = uitofp i16 %shl to half + %mul = fdiv half 0xH4000, %conv + ret half %mul +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-AVX2: {{.*}}