diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -53826,7 +53826,28 @@ return true; } } - + // Lookup if there is a negated constant vector in DAG. + if (V.getOpcode() == ISD::BUILD_VECTOR && V.hasOneUse() && + ISD::isBuildVectorOfConstantFPSDNodes(V.getNode())) { + SmallVector Ops; + EVT VT = V.getValueType(); + EVT EltVT = VT.getVectorElementType(); + for (auto op : V->op_values()) { + if (auto *Cst = dyn_cast(op)) { + Ops.push_back( + DAG.getConstantFP(-Cst->getValueAPF(), SDLoc(op), EltVT)); + } else { + assert(op.isUndef()); + Ops.push_back(DAG.getUNDEF(EltVT)); + } + } + SDNode *NV = + DAG.getNodeIfExists(ISD::BUILD_VECTOR, DAG.getVTList(VT), {Ops}); + if (NV && !NV->use_empty()) { + V.setNode(NV); + return true; + } + } return false; }; diff --git a/llvm/test/CodeGen/X86/fma-fneg-combine-2.ll b/llvm/test/CodeGen/X86/fma-fneg-combine-2.ll --- a/llvm/test/CodeGen/X86/fma-fneg-combine-2.ll +++ b/llvm/test/CodeGen/X86/fma-fneg-combine-2.ll @@ -130,16 +130,18 @@ ; FMA3-LABEL: negated_constant_v4f64: ; FMA3: # %bb.0: ; FMA3-NEXT: vmovapd {{.*#+}} ymm2 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] -; FMA3-NEXT: vfmadd213pd {{.*#+}} ymm2 = (ymm0 * ymm2) + ymm1 -; FMA3-NEXT: vfmadd231pd {{.*#+}} ymm1 = (ymm0 * mem) + ymm1 -; FMA3-NEXT: vaddpd %ymm1, %ymm2, %ymm0 +; FMA3-NEXT: vmovapd %ymm2, %ymm3 +; FMA3-NEXT: vfmadd213pd {{.*#+}} ymm3 = (ymm0 * ymm3) + ymm1 +; FMA3-NEXT: vfnmadd213pd {{.*#+}} ymm2 = -(ymm0 * ymm2) + ymm1 +; FMA3-NEXT: vaddpd %ymm2, %ymm3, %ymm0 ; FMA3-NEXT: retq ; ; FMA4-LABEL: negated_constant_v4f64: ; FMA4: # %bb.0: -; FMA4-NEXT: vfmaddpd {{.*#+}} ymm2 = (ymm0 * mem) + ymm1 -; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 -; FMA4-NEXT: vaddpd %ymm0, %ymm2, %ymm0 +; FMA4-NEXT: vmovapd {{.*#+}} ymm2 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; FMA4-NEXT: vfmaddpd {{.*#+}} ymm3 = (ymm0 * ymm2) + ymm1 +; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 +; FMA4-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ; FMA4-NEXT: retq %3 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %0, <4 x double> , <4 x double> %1) %4 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %0, <4 x double> , <4 x double> %1)