diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4087,7 +4087,8 @@ case ISD::FLDEXP: case ISD::FPOWI: - Res = WidenVecRes_ExpOp(N); + if (!unrollExpandedOp()) + Res = WidenVecRes_ExpOp(N); break; case ISD::ANY_EXTEND_VECTOR_INREG: diff --git a/llvm/test/CodeGen/PowerPC/ldexp.ll b/llvm/test/CodeGen/PowerPC/ldexp.ll --- a/llvm/test/CodeGen/PowerPC/ldexp.ll +++ b/llvm/test/CodeGen/PowerPC/ldexp.ll @@ -52,56 +52,37 @@ ; CHECK-LABEL: ldexp_v2f32: ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 -; CHECK-NEXT: stdu r1, -96(r1) -; CHECK-NEXT: std r0, 112(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 96 +; CHECK-NEXT: stdu r1, -80(r1) +; CHECK-NEXT: std r0, 96(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 80 ; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v28, -64 ; CHECK-NEXT: .cfi_offset v29, -48 ; CHECK-NEXT: .cfi_offset v30, -32 ; CHECK-NEXT: .cfi_offset v31, -16 -; CHECK-NEXT: li r3, 12 -; CHECK-NEXT: xscvspdpn f1, v2 -; CHECK-NEXT: stxv v28, 32(r1) # 16-byte Folded Spill -; CHECK-NEXT: stxv v29, 48(r1) # 16-byte Folded Spill -; CHECK-NEXT: stxv v30, 64(r1) # 16-byte Folded Spill -; CHECK-NEXT: stxv v31, 80(r1) # 16-byte Folded Spill +; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: stxv v29, 32(r1) # 16-byte Folded Spill +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: vextuwrx r4, r3, v3 +; CHECK-NEXT: stxv v30, 48(r1) # 16-byte Folded Spill +; CHECK-NEXT: stxv v31, 64(r1) # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v3 ; CHECK-NEXT: vmr v30, v2 -; CHECK-NEXT: vextuwrx r4, r3, v3 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop ; CHECK-NEXT: xxswapd vs0, v30 ; CHECK-NEXT: li r3, 4 -; CHECK-NEXT: xscpsgndp v29, f1, f1 +; CHECK-NEXT: xscvdpspn v29, f1 ; CHECK-NEXT: xscvspdpn f1, vs0 ; CHECK-NEXT: vextuwrx r4, r3, v31 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop -; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; CHECK-NEXT: xxmrghd vs0, v29, vs1 -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: vextuwrx r4, r3, v31 -; CHECK-NEXT: xvcvdpsp v28, vs0 -; CHECK-NEXT: xxsldwi vs0, v30, v30, 3 -; CHECK-NEXT: xscvspdpn f1, vs0 -; CHECK-NEXT: bl ldexpf -; CHECK-NEXT: nop -; CHECK-NEXT: xxsldwi vs0, v30, v30, 1 -; CHECK-NEXT: xscpsgndp v29, f1, f1 -; CHECK-NEXT: mfvsrwz r4, v31 -; CHECK-NEXT: xscvspdpn f1, vs0 -; CHECK-NEXT: bl ldexpf -; CHECK-NEXT: nop -; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1 -; CHECK-NEXT: xxmrghd vs0, vs1, v29 -; CHECK-NEXT: lxv v31, 80(r1) # 16-byte Folded Reload -; CHECK-NEXT: lxv v30, 64(r1) # 16-byte Folded Reload -; CHECK-NEXT: lxv v29, 48(r1) # 16-byte Folded Reload -; CHECK-NEXT: xvcvdpsp v2, vs0 -; CHECK-NEXT: vmrgew v2, v28, v2 -; CHECK-NEXT: lxv v28, 32(r1) # 16-byte Folded Reload -; CHECK-NEXT: addi r1, r1, 96 +; CHECK-NEXT: xscvdpspn vs0, f1 +; CHECK-NEXT: lxv v31, 64(r1) # 16-byte Folded Reload +; CHECK-NEXT: lxv v30, 48(r1) # 16-byte Folded Reload +; CHECK-NEXT: xxmrghw v2, vs0, v29 +; CHECK-NEXT: lxv v29, 32(r1) # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 80 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr diff --git a/llvm/test/CodeGen/X86/ldexp.ll b/llvm/test/CodeGen/X86/ldexp.ll --- a/llvm/test/CodeGen/X86/ldexp.ll +++ b/llvm/test/CodeGen/X86/ldexp.ll @@ -106,30 +106,14 @@ define <2 x float> @ldexp_v2f32(<2 x float> %val, <2 x i32> %exp) { ; X64-LABEL: ldexp_v2f32: ; X64: # %bb.0: -; X64-NEXT: subq $72, %rsp -; X64-NEXT: .cfi_def_cfa_offset 80 +; X64-NEXT: subq $56, %rsp +; X64-NEXT: .cfi_def_cfa_offset 64 ; X64-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] -; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,3,3,3] -; X64-NEXT: movd %xmm2, %edi -; X64-NEXT: callq ldexpf@PLT ; X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; X64-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; X64-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload -; X64-NEXT: # xmm1 = mem[2,3,2,3] ; X64-NEXT: movd %xmm1, %edi ; X64-NEXT: callq ldexpf@PLT -; X64-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload -; X64-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] -; X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; X64-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; X64-NEXT: movd %xmm0, %edi -; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; X64-NEXT: callq ldexpf@PLT ; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload ; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] ; X64-NEXT: pshufd $85, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload ; X64-NEXT: # xmm1 = mem[1,1,1,1] @@ -137,10 +121,8 @@ ; X64-NEXT: callq ldexpf@PLT ; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X64-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload -; X64-NEXT: # xmm1 = xmm1[0],mem[0] ; X64-NEXT: movaps %xmm1, %xmm0 -; X64-NEXT: addq $72, %rsp +; X64-NEXT: addq $56, %rsp ; X64-NEXT: .cfi_def_cfa_offset 8 ; X64-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/powi.ll b/llvm/test/CodeGen/X86/powi.ll --- a/llvm/test/CodeGen/X86/powi.ll +++ b/llvm/test/CodeGen/X86/powi.ll @@ -154,7 +154,101 @@ ret double %ret } +define <2 x float> @powi_v2f32(<2 x float> %a) minsize { +; X86-X87-LABEL: powi_v2f32: +; X86-X87: # %bb.0: +; X86-X87-NEXT: pushl %esi +; X86-X87-NEXT: .cfi_def_cfa_offset 8 +; X86-X87-NEXT: subl $16, %esp +; X86-X87-NEXT: .cfi_def_cfa_offset 24 +; X86-X87-NEXT: .cfi_offset %esi, -8 +; X86-X87-NEXT: flds {{[0-9]+}}(%esp) +; X86-X87-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-X87-NEXT: flds {{[0-9]+}}(%esp) +; X86-X87-NEXT: pushl $15 +; X86-X87-NEXT: .cfi_adjust_cfa_offset 4 +; X86-X87-NEXT: popl %esi +; X86-X87-NEXT: .cfi_adjust_cfa_offset -4 +; X86-X87-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-X87-NEXT: fstps (%esp) +; X86-X87-NEXT: calll __powisf2 +; X86-X87-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-X87-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-X87-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-X87-NEXT: fstps (%esp) +; X86-X87-NEXT: calll __powisf2 +; X86-X87-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-X87-NEXT: fxch %st(1) +; X86-X87-NEXT: addl $16, %esp +; X86-X87-NEXT: .cfi_def_cfa_offset 8 +; X86-X87-NEXT: popl %esi +; X86-X87-NEXT: .cfi_def_cfa_offset 4 +; X86-X87-NEXT: retl +; +; X86-SSE-LABEL: powi_v2f32: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pushl %esi +; X86-SSE-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE-NEXT: subl $32, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 40 +; X86-SSE-NEXT: .cfi_offset %esi, -8 +; X86-SSE-NEXT: movups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; X86-SSE-NEXT: pushl $15 +; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 +; X86-SSE-NEXT: popl %esi +; X86-SSE-NEXT: .cfi_adjust_cfa_offset -4 +; X86-SSE-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movss %xmm0, (%esp) +; X86-SSE-NEXT: calll __powisf2 +; X86-SSE-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload +; X86-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X86-SSE-NEXT: movss %xmm0, (%esp) +; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp) +; X86-SSE-NEXT: calll __powisf2 +; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X86-SSE-NEXT: addl $32, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE-NEXT: popl %esi +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; X64-LABEL: powi_v2f32: +; X64: # %bb.0: +; X64-NEXT: pushq %rbx +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: subq $32, %rsp +; X64-NEXT: .cfi_def_cfa_offset 48 +; X64-NEXT: .cfi_offset %rbx, -16 +; X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; X64-NEXT: pushq $15 +; X64-NEXT: .cfi_adjust_cfa_offset 8 +; X64-NEXT: popq %rbx +; X64-NEXT: .cfi_adjust_cfa_offset -8 +; X64-NEXT: movl %ebx, %edi +; X64-NEXT: callq __powisf2@PLT +; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X64-NEXT: movl %ebx, %edi +; X64-NEXT: callq __powisf2@PLT +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X64-NEXT: movaps %xmm1, %xmm0 +; X64-NEXT: addq $32, %rsp +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: popq %rbx +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq + %ret = tail call < 2 x float> @llvm.powi.v2f32.i32(<2 x float> %a, i32 15) nounwind ; + ret <2 x float> %ret +} + declare double @llvm.powi.f64.i32(double, i32) nounwind readonly +declare < 2 x float> @llvm.powi.v2f32.i32(<2 x float>, i32) nounwind readonly !llvm.module.flags = !{!0} !0 = !{i32 1, !"ProfileSummary", !1}