Index: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3678,12 +3678,29 @@ Res = WidenVecRes_Binary(N); break; + case ISD::FREM: { + // We're going to widen this vector op to a legal type by padding with undef + // elements. If the wide vector op is eventually going to be expanded to + // scalar libcalls, then unroll into scalar ops now to avoid unnecessary + // libcalls on the undef elements. + EVT VT = N->getValueType(0); + EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + if (!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) && + TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) { + Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements()); + break; + } + // If the target has custom/legal support for the scalar FP intrinsic ops + // (they are probably not destined to become libcalls), then widen those like + // any other binary ops. + } + LLVM_FALLTHROUGH; + case ISD::FADD: case ISD::FMUL: case ISD::FPOW: case ISD::FSUB: case ISD::FDIV: - case ISD::FREM: case ISD::SDIV: case ISD::UDIV: case ISD::SREM: Index: llvm/test/CodeGen/X86/frem-libcall.ll =================================================================== --- llvm/test/CodeGen/X86/frem-libcall.ll +++ llvm/test/CodeGen/X86/frem-libcall.ll @@ -8,42 +8,26 @@ ; CHECK-LABEL: frem: ; CHECK: # %bb.0: ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: subq $80, %rsp +; CHECK-NEXT: subq $64, %rsp ; CHECK-NEXT: movq %rdi, %rbx ; CHECK-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] -; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3,3,3] -; CHECK-NEXT: callq fmodf@PLT -; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload -; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] -; CHECK-NEXT: callq fmodf@PLT -; CHECK-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload -; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload -; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-NEXT: callq fmodf@PLT ; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] ; CHECK-NEXT: callq fmodf@PLT ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; CHECK-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload -; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] ; CHECK-NEXT: divps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload ; CHECK-NEXT: movaps %xmm1, %xmm0 ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[1,1] ; CHECK-NEXT: addss %xmm1, %xmm0 ; CHECK-NEXT: movlps %xmm1, (%rbx) -; CHECK-NEXT: addq $80, %rsp +; CHECK-NEXT: addq $64, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: retq %frem = frem <2 x float> %a0, %a1