Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -40622,16 +40622,23 @@ if (N->getOpcode() == ISD::FNEG) return N->getOperand(0); + unsigned ScalarSize = N->getValueType(0).getScalarSizeInBits(); + SDValue Op = peekThroughBitcasts(SDValue(N, 0)); - auto VT = Op->getValueType(0); + EVT VT = Op->getValueType(0); + // Make sure the element size does't change. + if (VT.getScalarSizeInBits() != ScalarSize) + return SDValue(); + if (auto SVOp = dyn_cast(Op.getNode())) { // For a VECTOR_SHUFFLE(VEC1, VEC2), if the VEC2 is undef, then the negate // of this is VECTOR_SHUFFLE(-VEC1, UNDEF). The mask can be anything here. if (!SVOp->getOperand(1).isUndef()) return SDValue(); if (SDValue NegOp0 = isFNEG(DAG, SVOp->getOperand(0).getNode())) - return DAG.getVectorShuffle(VT, SDLoc(SVOp), NegOp0, DAG.getUNDEF(VT), - SVOp->getMask()); + if (NegOp0.getValueType() == VT) // FIXME: Can we do better? + return DAG.getVectorShuffle(VT, SDLoc(SVOp), NegOp0, DAG.getUNDEF(VT), + SVOp->getMask()); return SDValue(); } unsigned Opc = Op.getOpcode(); @@ -40643,19 +40650,17 @@ if (!InsVector.isUndef()) return SDValue(); if (SDValue NegInsVal = isFNEG(DAG, InsVal.getNode())) - return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), VT, InsVector, - NegInsVal, Op.getOperand(2)); + if (NegInsVal.getValueType() == VT.getVectorElementType()) // FIXME + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), VT, InsVector, + NegInsVal, Op.getOperand(2)); return SDValue(); } if (Opc != X86ISD::FXOR && Opc != ISD::XOR && Opc != ISD::FSUB) return SDValue(); - SDValue Op1 = peekThroughBitcasts(Op.getOperand(1)); - if (!Op1.getValueType().isFloatingPoint()) - return SDValue(); - - SDValue Op0 = peekThroughBitcasts(Op.getOperand(0)); + SDValue Op1 = Op.getOperand(1); + SDValue Op0 = Op.getOperand(0); // For XOR and FXOR, we want to check if constant bits of Op1 are sign bit // masks. For FSUB, we have to check if constant bits of Op0 are sign bit @@ -40667,7 +40672,7 @@ SmallVector EltBits; // Extract constant bits and see if they are all sign bit masks. Ignore the // undef elements. - if (getTargetConstantBitsFromNode(Op1, Op1.getScalarValueSizeInBits(), + if (getTargetConstantBitsFromNode(Op1, ScalarSize, UndefElts, EltBits, /* AllowWholeUndefs */ true, /* AllowPartialUndefs */ false)) { @@ -41693,6 +41698,10 @@ if (!NegVal) return SDValue(); + // FIXME: Should we bitcast instead? + if (NegVal.getValueType() != VT) + return SDValue(); + unsigned NewOpcode; switch (N->getOpcode()) { default: llvm_unreachable("Unexpected opcode!"); Index: llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll =================================================================== --- llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll +++ llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll @@ -5763,16 +5763,14 @@ ; X86-LABEL: test_mm_mask_fmsub_round_sd: ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: vxorpd {{\.LCPI.*}}, %xmm2, %xmm2 ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vfmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} +; X86-NEXT: vfmsub213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl ; ; X64-LABEL: test_mm_mask_fmsub_round_sd: ; X64: # %bb.0: # %entry -; X64-NEXT: vxorpd {{.*}}(%rip), %xmm2, %xmm2 ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vfmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} +; X64-NEXT: vfmsub213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} ; X64-NEXT: retq entry: %0 = extractelement <2 x double> %__W, i64 0 @@ -5817,16 +5815,14 @@ ; X86-LABEL: test_mm_maskz_fmsub_round_sd: ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: vxorpd {{\.LCPI.*}}, %xmm2, %xmm2 ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vfmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} +; X86-NEXT: vfmsub213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} ; X86-NEXT: retl ; ; X64-LABEL: test_mm_maskz_fmsub_round_sd: ; X64: # %bb.0: # %entry -; X64-NEXT: vxorpd {{.*}}(%rip), %xmm2, %xmm2 ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vfmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} +; X64-NEXT: vfmsub213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} ; X64-NEXT: retq entry: %0 = extractelement <2 x double> %__A, i64 0 @@ -5874,19 +5870,15 @@ ; X86-LABEL: test_mm_mask3_fmsub_round_sd: ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: vxorpd {{\.LCPI.*}}, %xmm2, %xmm3 -; X86-NEXT: vfmadd213sd {rn-sae}, %xmm3, %xmm0, %xmm1 ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1} +; X86-NEXT: vfmsub231sd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} ; X86-NEXT: vmovapd %xmm2, %xmm0 ; X86-NEXT: retl ; ; X64-LABEL: test_mm_mask3_fmsub_round_sd: ; X64: # %bb.0: # %entry -; X64-NEXT: vxorpd {{.*}}(%rip), %xmm2, %xmm3 -; X64-NEXT: vfmadd213sd {rn-sae}, %xmm3, %xmm0, %xmm1 ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1} +; X64-NEXT: vfmsub231sd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} ; X64-NEXT: vmovapd %xmm2, %xmm0 ; X64-NEXT: retq entry: @@ -5933,16 +5925,14 @@ ; X86-LABEL: test_mm_mask_fnmadd_round_sd: ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: vxorpd {{\.LCPI.*}}, %xmm1, %xmm1 ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vfmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} +; X86-NEXT: vfnmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl ; ; X64-LABEL: test_mm_mask_fnmadd_round_sd: ; X64: # %bb.0: # %entry -; X64-NEXT: vxorpd {{.*}}(%rip), %xmm1, %xmm1 ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vfmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} +; X64-NEXT: vfnmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} ; X64-NEXT: retq entry: %0 = extractelement <2 x double> %__W, i64 0 @@ -5987,16 +5977,14 @@ ; X86-LABEL: test_mm_maskz_fnmadd_round_sd: ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: vxorpd {{\.LCPI.*}}, %xmm1, %xmm1 ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vfmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} +; X86-NEXT: vfnmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} ; X86-NEXT: retl ; ; X64-LABEL: test_mm_maskz_fnmadd_round_sd: ; X64: # %bb.0: # %entry -; X64-NEXT: vxorpd {{.*}}(%rip), %xmm1, %xmm1 ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vfmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} +; X64-NEXT: vfnmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} ; X64-NEXT: retq entry: %0 = extractelement <2 x double> %__A, i64 0 @@ -6044,17 +6032,15 @@ ; X86-LABEL: test_mm_mask3_fnmadd_round_sd: ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: vxorpd {{\.LCPI.*}}, %xmm1, %xmm1 ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vfmadd231sd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} +; X86-NEXT: vfnmadd231sd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} ; X86-NEXT: vmovapd %xmm2, %xmm0 ; X86-NEXT: retl ; ; X64-LABEL: test_mm_mask3_fnmadd_round_sd: ; X64: # %bb.0: # %entry -; X64-NEXT: vxorpd {{.*}}(%rip), %xmm1, %xmm1 ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vfmadd231sd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} +; X64-NEXT: vfnmadd231sd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} ; X64-NEXT: vmovapd %xmm2, %xmm0 ; X64-NEXT: retq entry: