diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3580,6 +3580,22 @@ return; SDValue Res = SDValue(); + + auto unrollExpandedOp = [&]() { + // We're going to widen this vector op to a legal type by padding with undef + // elements. If the wide vector op is eventually going to be expanded to + // scalar libcalls, then unroll into scalar ops now to avoid unnecessary + // libcalls on the undef elements. + EVT VT = N->getValueType(0); + EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + if (!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) && + TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) { + Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements()); + return true; + } + return false; + }; + switch (N->getOpcode()) { default: #ifndef NDEBUG @@ -3678,12 +3694,19 @@ Res = WidenVecRes_Binary(N); break; + case ISD::FREM: + if (unrollExpandedOp()) + break; + // If the target has custom/legal support for the scalar FP intrinsic ops + // (they are probably not destined to become libcalls), then widen those + // like any other binary ops. + LLVM_FALLTHROUGH; + case ISD::FADD: case ISD::FMUL: case ISD::FPOW: case ISD::FSUB: case ISD::FDIV: - case ISD::FREM: case ISD::SDIV: case ISD::UDIV: case ISD::SREM: @@ -3766,23 +3789,13 @@ case ISD::FROUNDEVEN: case ISD::FSIN: case ISD::FSQRT: - case ISD::FTRUNC: { - // We're going to widen this vector op to a legal type by padding with undef - // elements. If the wide vector op is eventually going to be expanded to - // scalar libcalls, then unroll into scalar ops now to avoid unnecessary - // libcalls on the undef elements. - EVT VT = N->getValueType(0); - EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - if (!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) && - TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) { - Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements()); + case ISD::FTRUNC: + if (unrollExpandedOp()) break; - } - } - // If the target has custom/legal support for the scalar FP intrinsic ops - // (they are probably not destined to become libcalls), then widen those like - // any other unary ops. - LLVM_FALLTHROUGH; + // If the target has custom/legal support for the scalar FP intrinsic ops + // (they are probably not destined to become libcalls), then widen those + // like any other unary ops. + LLVM_FALLTHROUGH; case ISD::ABS: case ISD::BITREVERSE: diff --git a/llvm/test/CodeGen/X86/frem-libcall.ll b/llvm/test/CodeGen/X86/frem-libcall.ll --- a/llvm/test/CodeGen/X86/frem-libcall.ll +++ b/llvm/test/CodeGen/X86/frem-libcall.ll @@ -1,49 +1,33 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-linux-gnu < %s | FileCheck %s -; FIXME: Ensure vectorized FREMs are not widened/unrolled such that they get lowered +; Ensure vectorized FREMs are not widened/unrolled such that they get lowered ; into libcalls on undef elements. define float @frem(<2 x float> %a0, <2 x float> %a1, <2 x float> %a2, <2 x float> *%p3) nounwind { ; CHECK-LABEL: frem: ; CHECK: # %bb.0: ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: subq $80, %rsp +; CHECK-NEXT: subq $64, %rsp ; CHECK-NEXT: movq %rdi, %rbx ; CHECK-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] -; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3,3,3] -; CHECK-NEXT: callq fmodf@PLT -; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload -; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] -; CHECK-NEXT: callq fmodf@PLT -; CHECK-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload -; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-NEXT: callq fmodf@PLT ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] ; CHECK-NEXT: callq fmodf@PLT ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; CHECK-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload -; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] ; CHECK-NEXT: divps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload ; CHECK-NEXT: movaps %xmm1, %xmm0 ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[1,1] ; CHECK-NEXT: addss %xmm1, %xmm0 ; CHECK-NEXT: movlps %xmm1, (%rbx) -; CHECK-NEXT: addq $80, %rsp +; CHECK-NEXT: addq $64, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq %frem = frem <2 x float> %a0, %a1