diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -440,8 +440,8 @@ def fmul : SDNode<"ISD::FMUL" , SDTFPBinOp, [SDNPCommutative]>; def fdiv : SDNode<"ISD::FDIV" , SDTFPBinOp>; def frem : SDNode<"ISD::FREM" , SDTFPBinOp>; -def fma : SDNode<"ISD::FMA" , SDTFPTernaryOp>; -def fmad : SDNode<"ISD::FMAD" , SDTFPTernaryOp>; +def fma : SDNode<"ISD::FMA" , SDTFPTernaryOp, [SDNPCommutative]>; +def fmad : SDNode<"ISD::FMAD" , SDTFPTernaryOp, [SDNPCommutative]>; def fabs : SDNode<"ISD::FABS" , SDTFPUnaryOp>; def fminnum : SDNode<"ISD::FMINNUM" , SDTFPBinOp, [SDNPCommutative, SDNPAssociative]>; @@ -498,7 +498,7 @@ def strict_frem : SDNode<"ISD::STRICT_FREM", SDTFPBinOp, [SDNPHasChain]>; def strict_fma : SDNode<"ISD::STRICT_FMA", - SDTFPTernaryOp, [SDNPHasChain]>; + SDTFPTernaryOp, [SDNPHasChain, SDNPCommutative]>; def strict_fsqrt : SDNode<"ISD::STRICT_FSQRT", SDTFPUnaryOp, [SDNPHasChain]>; def strict_fsin : SDNode<"ISD::STRICT_FSIN", diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -3738,18 +3738,6 @@ def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))), (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -// And here "(-a) + b*(-c)" - -let Predicates = [HasNEON, HasFullFP16] in -def : Pat<(f16 (fma FPR16:$Rn, (fneg FPR16:$Rm), (fneg FPR16:$Ra))), - (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>; - -def : Pat<(f32 (fma FPR32:$Rn, (fneg FPR32:$Rm), (fneg FPR32:$Ra))), - (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; - -def : Pat<(f64 (fma FPR64:$Rn, (fneg FPR64:$Rm), (fneg FPR64:$Ra))), - (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; - //===----------------------------------------------------------------------===// // Floating point comparison instructions. //===----------------------------------------------------------------------===// @@ -4067,17 +4055,6 @@ defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls", TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; -// The following def pats catch the case where the LHS of an FMA is negated. -// The TriOpFrag above catches the case where the middle operand is negated. -def : Pat<(v2f32 (fma (fneg V64:$Rn), V64:$Rm, V64:$Rd)), - (FMLSv2f32 V64:$Rd, V64:$Rn, V64:$Rm)>; - -def : Pat<(v4f32 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)), - (FMLSv4f32 V128:$Rd, V128:$Rn, V128:$Rm)>; - -def : Pat<(v2f64 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)), - (FMLSv2f64 V128:$Rd, V128:$Rn, V128:$Rm)>; - defm FMULX : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>; defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", fmul>; defm FRECPS : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>; diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -3684,16 +3684,10 @@ if fms then { def : Pat<(VTI.Vec (fma (fneg m1), m2, add)), (Inst $add, $m1, $m2)>; - def : Pat<(VTI.Vec (fma m1, (fneg m2), add)), - (Inst $add, $m1, $m2)>; def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred), (VTI.Vec (fma (fneg m1), m2, add)), add)), (Inst $add, $m1, $m2, ARMVCCThen, $pred)>; - def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred), - (VTI.Vec (fma m1, (fneg m2), add)), - add)), - (Inst $add, $m1, $m2, ARMVCCThen, $pred)>; def : Pat<(VTI.Vec (pred_int (fneg m1), m2, add, pred)), (Inst $add, $m1, $m2, ARMVCCThen, $pred)>; def : Pat<(VTI.Vec (pred_int m1, (fneg m2), add, pred)), diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td --- a/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -2264,16 +2264,6 @@ def : Pat<(f16 (fma (fneg (f16 HPR:$Sn)), (f16 HPR:$Sm), (f16 HPR:$Sdin))), (VFMSH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>, Requires<[HasFullFP16]>; -// (fma x, (fneg y), z) -> (vfms z, x, y) -def : Pat<(f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin)), - (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, - Requires<[HasVFP4,HasDPVFP]>; -def : Pat<(f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin)), - (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, - Requires<[HasVFP4]>; -def : Pat<(f16 (fma (f16 HPR:$Sn), (fneg (f16 HPR:$Sm)), (f16 HPR:$Sdin))), - (VFMSH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>, - Requires<[HasFullFP16]>; def VFNMAD : ADbI<0b11101, 0b01, 1, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -2391,16 +2381,6 @@ def : Pat<(fneg (f16 (fma (fneg (f16 HPR:$Sn)), (f16 HPR:$Sm), (f16 HPR:$Sdin)))), (VFNMSH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>, Requires<[HasFullFP16]>; -// (fneg (fma x, (fneg y), z) -> (vfnms z, x, y) -def : Pat<(fneg (f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin))), - (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, - Requires<[HasVFP4,HasDPVFP]>; -def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))), - (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, - Requires<[HasVFP4]>; -def : Pat<(fneg (f16 (fma (f16 HPR:$Sn), (fneg (f16 HPR:$Sm)), (f16 HPR:$Sdin)))), - (VFNMSH (f16 HPR:$Sdin), (f16 HPR:$Sn), (f16 HPR:$Sm))>, - Requires<[HasFullFP16]>; //===----------------------------------------------------------------------===// // FP Conditional moves. diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td --- a/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -1708,8 +1708,6 @@ (F2_sffma F32:$Rx, F32:$Rs, F32:$Rt)>; def: Pat<(fma (fneg F32:$Rs), F32:$Rt, F32:$Rx), (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>; -def: Pat<(fma F32:$Rs, (fneg F32:$Rt), F32:$Rx), - (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>; def: Pat<(mul V2I32:$Rs, V2I32:$Rt), (PS_vmulw V2I32:$Rs, V2I32:$Rt)>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -276,14 +276,10 @@ // fnmsub: -rs1 * rs2 + rs3 def : Pat<(fma (fneg FPR64:$rs1), FPR64:$rs2, FPR64:$rs3), (FNMSUB_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>; -def : Pat<(fma FPR64:$rs1, (fneg FPR64:$rs2), FPR64:$rs3), - (FNMSUB_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>; // fnmadd: -rs1 * rs2 - rs3 def : Pat<(fma (fneg FPR64:$rs1), FPR64:$rs2, (fneg FPR64:$rs3)), (FNMADD_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>; -def : Pat<(fma FPR64:$rs1, (fneg FPR64:$rs2), (fneg FPR64:$rs3)), - (FNMADD_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>; // The RISC-V 2.2 user-level ISA spec defines fmin and fmax as returning the // canonical NaN when giving a signaling NaN. This doesn't match the LLVM diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -332,14 +332,10 @@ // fnmsub: -rs1 * rs2 + rs3 def : Pat<(fma (fneg FPR32:$rs1), FPR32:$rs2, FPR32:$rs3), (FNMSUB_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>; -def : Pat<(fma FPR32:$rs1, (fneg FPR32:$rs2), FPR32:$rs3), - (FNMSUB_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>; // fnmadd: -rs1 * rs2 - rs3 def : Pat<(fma (fneg FPR32:$rs1), FPR32:$rs2, (fneg FPR32:$rs3)), (FNMADD_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>; -def : Pat<(fma FPR32:$rs1, (fneg FPR32:$rs2), (fneg FPR32:$rs3)), - (FNMADD_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>; // The RISC-V 2.2 user-level ISA spec defines fmin and fmax as returning the // canonical NaN when given a signaling NaN. This doesn't match the LLVM diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -6533,7 +6533,7 @@ avx512vl_f64_info, "PD">, VEX_W; } -defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86any_Fmadd, +defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma, X86Fmadd, X86FmaddRnd>; defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub, X86Fmsub, X86FmsubRnd>; @@ -6624,7 +6624,7 @@ avx512vl_f64_info, "PD">, VEX_W; } -defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86any_Fmadd, +defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma, X86Fmadd, X86FmaddRnd>; defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub, X86Fmsub, X86FmsubRnd>; @@ -6716,7 +6716,7 @@ avx512vl_f64_info, "PD">, VEX_W; } -defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86any_Fmadd, +defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma, X86Fmadd, X86FmaddRnd>; defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub, X86Fmsub, X86FmsubRnd>; @@ -6819,7 +6819,7 @@ } } -defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86any_Fmadd, X86FmaddRnd>; +defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", any_fma, X86FmaddRnd>; defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>; defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>; defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>; @@ -7027,7 +7027,7 @@ } } -defm : avx512_scalar_fma_patterns; defm : avx512_scalar_fma_patterns; @@ -7036,7 +7036,7 @@ defm : avx512_scalar_fma_patterns; -defm : avx512_scalar_fma_patterns; defm : avx512_scalar_fma_patterns; diff --git a/llvm/lib/Target/X86/X86InstrFMA.td b/llvm/lib/Target/X86/X86InstrFMA.td --- a/llvm/lib/Target/X86/X86InstrFMA.td +++ b/llvm/lib/Target/X86/X86InstrFMA.td @@ -123,7 +123,7 @@ // Fused Multiply-Add let ExeDomain = SSEPackedSingle in { defm VFMADD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", "PS", - loadv4f32, loadv8f32, X86any_Fmadd, v4f32, v8f32, + loadv4f32, loadv8f32, any_fma, v4f32, v8f32, SchedWriteFMA>; defm VFMSUB : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", "PS", loadv4f32, loadv8f32, X86any_Fmsub, v4f32, v8f32, @@ -138,7 +138,7 @@ let ExeDomain = SSEPackedDouble in { defm VFMADD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", "PD", - loadv2f64, loadv4f64, X86any_Fmadd, v2f64, + loadv2f64, loadv4f64, any_fma, v2f64, v4f64, SchedWriteFMA>, VEX_W; defm VFMSUB : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", "PD", loadv2f64, loadv4f64, X86any_Fmsub, v2f64, @@ -319,7 +319,7 @@ VR128, sdmem, sched>, VEX_W; } -defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", X86any_Fmadd, +defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", any_fma, SchedWriteFMA.Scl>, VEX_LIG; defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", X86any_Fmsub, SchedWriteFMA.Scl>, VEX_LIG; @@ -372,12 +372,12 @@ } } -defm : scalar_fma_patterns; +defm : scalar_fma_patterns; defm : scalar_fma_patterns; defm : scalar_fma_patterns; defm : scalar_fma_patterns; -defm : scalar_fma_patterns; +defm : scalar_fma_patterns; defm : scalar_fma_patterns; defm : scalar_fma_patterns; defm : scalar_fma_patterns; @@ -538,7 +538,7 @@ let ExeDomain = SSEPackedSingle in { // Scalar Instructions - defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86any_Fmadd, loadf32, + defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, any_fma, loadf32, SchedWriteFMA.Scl>, fma4s_int<0x6A, "vfmaddss", ssmem, v4f32, SchedWriteFMA.Scl>; @@ -555,7 +555,7 @@ fma4s_int<0x7E, "vfnmsubss", ssmem, v4f32, SchedWriteFMA.Scl>; // Packed Instructions - defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86any_Fmadd, v4f32, v8f32, + defm VFMADDPS4 : fma4p<0x68, "vfmaddps", any_fma, v4f32, v8f32, loadv4f32, loadv8f32, SchedWriteFMA>; defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86any_Fmsub, v4f32, v8f32, loadv4f32, loadv8f32, SchedWriteFMA>; @@ -571,7 +571,7 @@ let ExeDomain = SSEPackedDouble in { // Scalar Instructions - defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86any_Fmadd, loadf64, + defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, any_fma, loadf64, SchedWriteFMA.Scl>, fma4s_int<0x6B, "vfmaddsd", sdmem, v2f64, SchedWriteFMA.Scl>; @@ -588,7 +588,7 @@ fma4s_int<0x7F, "vfnmsubsd", sdmem, v2f64, SchedWriteFMA.Scl>; // Packed Instructions - defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86any_Fmadd, v2f64, v4f64, + defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", any_fma, v2f64, v4f64, loadv2f64, loadv4f64, SchedWriteFMA>; defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86any_Fmsub, v2f64, v4f64, loadv2f64, loadv4f64, SchedWriteFMA>; @@ -629,12 +629,12 @@ } } -defm : scalar_fma4_patterns; +defm : scalar_fma4_patterns; defm : scalar_fma4_patterns; defm : scalar_fma4_patterns; defm : scalar_fma4_patterns; -defm : scalar_fma4_patterns; +defm : scalar_fma4_patterns; defm : scalar_fma4_patterns; defm : scalar_fma4_patterns; defm : scalar_fma4_patterns; diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -540,9 +540,6 @@ def X86Fmadd : SDNode<"ISD::FMA", SDTFPTernaryOp, [SDNPCommutative]>; def X86strict_Fmadd : SDNode<"ISD::STRICT_FMA", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>; -def X86any_Fmadd : PatFrags<(ops node:$src1, node:$src2, node:$src3), - [(X86strict_Fmadd node:$src1, node:$src2, node:$src3), - (X86Fmadd node:$src1, node:$src2, node:$src3)]>; def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFPTernaryOp, [SDNPCommutative]>; def X86strict_Fnmadd : SDNode<"X86ISD::STRICT_FNMADD", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>; def X86any_Fnmadd : PatFrags<(ops node:$src1, node:$src2, node:$src3), diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-with-no-legality-check.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-with-no-legality-check.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-with-no-legality-check.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-with-no-legality-check.mir @@ -279,8 +279,8 @@ ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 ; CHECK: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 0 :: (load 1) - ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[LDRBui]] - ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY2]], 0, 0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[LDRBui]] + ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 0, 0 ; CHECK: $noreg = PATCHABLE_RET [[UBFMWri]] %2:gpr(p0) = COPY $x0 %0:fpr(s1) = G_LOAD %2(p0) :: (load 1) @@ -544,7 +544,7 @@ ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s2 ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1 ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0 - ; CHECK: [[FNMADDSrrr:%[0-9]+]]:fpr32 = FNMADDSrrr [[COPY2]], [[COPY1]], [[COPY]] + ; CHECK: [[FNMADDSrrr:%[0-9]+]]:fpr32 = FNMADDSrrr [[COPY1]], [[COPY2]], [[COPY]] ; CHECK: $noreg = PATCHABLE_RET [[FNMADDSrrr]] %5:fpr(s32) = COPY $s2 %4:fpr(s32) = COPY $s1 @@ -581,7 +581,7 @@ ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d2 ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0 - ; CHECK: [[FNMADDDrrr:%[0-9]+]]:fpr64 = FNMADDDrrr [[COPY2]], [[COPY1]], [[COPY]] + ; CHECK: [[FNMADDDrrr:%[0-9]+]]:fpr64 = FNMADDDrrr [[COPY1]], [[COPY2]], [[COPY]] ; CHECK: $noreg = PATCHABLE_RET [[FNMADDDrrr]] %5:fpr(s64) = COPY $d2 %4:fpr(s64) = COPY $d1 @@ -1083,7 +1083,7 @@ ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d2 ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY $d1 ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0 - ; CHECK: [[FMLSv2f32_:%[0-9]+]]:fpr64 = FMLSv2f32 [[COPY1]], [[COPY]], [[COPY2]] + ; CHECK: [[FMLSv2f32_:%[0-9]+]]:fpr64 = FMLSv2f32 [[COPY1]], [[COPY2]], [[COPY]] ; CHECK: $noreg = PATCHABLE_RET [[FMLSv2f32_]] %4:fpr(<2 x s32>) = COPY $d2 %3:fpr(<2 x s32>) = COPY $d1 @@ -1118,7 +1118,7 @@ ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q2 ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 ; CHECK: [[COPY2:%[0-9]+]]:fpr128 = COPY $q0 - ; CHECK: [[FMLSv4f32_:%[0-9]+]]:fpr128 = FMLSv4f32 [[COPY1]], [[COPY]], [[COPY2]] + ; CHECK: [[FMLSv4f32_:%[0-9]+]]:fpr128 = FMLSv4f32 [[COPY1]], [[COPY2]], [[COPY]] ; CHECK: $noreg = PATCHABLE_RET [[FMLSv4f32_]] %4:fpr(<4 x s32>) = COPY $q2 %3:fpr(<4 x s32>) = COPY $q1 @@ -1153,7 +1153,7 @@ ; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q2 ; CHECK: [[COPY1:%[0-9]+]]:fpr128 = COPY $q1 ; CHECK: [[COPY2:%[0-9]+]]:fpr128 = COPY $q0 - ; CHECK: [[FMLSv2f64_:%[0-9]+]]:fpr128 = FMLSv2f64 [[COPY1]], [[COPY]], [[COPY2]] + ; CHECK: [[FMLSv2f64_:%[0-9]+]]:fpr128 = FMLSv2f64 [[COPY1]], [[COPY2]], [[COPY]] ; CHECK: $noreg = PATCHABLE_RET [[FMLSv2f64_]] %4:fpr(<2 x s64>) = COPY $q2 %3:fpr(<2 x s64>) = COPY $q1 diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll --- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll @@ -725,7 +725,7 @@ ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: ldr d2, [x1] ; CHECK-NEXT: ldr d0, [x2] -; CHECK-NEXT: fmls.2s v0, v2, v1 +; CHECK-NEXT: fmls.2s v0, v1, v2 ; CHECK-NEXT: ret %tmp1 = load <2 x float>, <2 x float>* %A %tmp2 = load <2 x float>, <2 x float>* %B @@ -741,7 +741,7 @@ ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: fmls.4s v0, v2, v1 +; CHECK-NEXT: fmls.4s v0, v1, v2 ; CHECK-NEXT: ret %tmp1 = load <4 x float>, <4 x float>* %A %tmp2 = load <4 x float>, <4 x float>* %B @@ -757,7 +757,7 @@ ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: fmls.2d v0, v2, v1 +; CHECK-NEXT: fmls.2d v0, v1, v2 ; CHECK-NEXT: ret %tmp1 = load <2 x double>, <2 x double>* %A %tmp2 = load <2 x double>, <2 x double>* %B diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir @@ -640,7 +640,7 @@ ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY $d0 ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY $d1 ; CHECK: [[COPY2:%[0-9]+]]:dpr = COPY $d2 - ; CHECK: [[VFMSD:%[0-9]+]]:dpr = VFMSD [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg + ; CHECK: [[VFMSD:%[0-9]+]]:dpr = VFMSD [[COPY2]], [[COPY1]], [[COPY]], 14 /* CC::al */, $noreg ; CHECK: $d0 = COPY [[VFMSD]] ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0 %0(s64) = COPY $d0 diff --git a/llvm/test/CodeGen/ARM/GlobalISel/select-pr35926.mir b/llvm/test/CodeGen/ARM/GlobalISel/select-pr35926.mir --- a/llvm/test/CodeGen/ARM/GlobalISel/select-pr35926.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/select-pr35926.mir @@ -31,7 +31,7 @@ ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY $d0 ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY $d1 ; CHECK: [[COPY2:%[0-9]+]]:dpr = COPY $d2 - ; CHECK: [[VFNMSD:%[0-9]+]]:dpr = VFNMSD [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg + ; CHECK: [[VFNMSD:%[0-9]+]]:dpr = VFNMSD [[COPY2]], [[COPY1]], [[COPY]], 14 /* CC::al */, $noreg ; CHECK: $d0 = COPY [[VFNMSD]] ; CHECK: MOVPCLR 14 /* CC::al */, $noreg, implicit $d0 %0:fprb(s64) = COPY $d0 diff --git a/llvm/test/CodeGen/ARM/fp16-fusedMAC.ll b/llvm/test/CodeGen/ARM/fp16-fusedMAC.ll --- a/llvm/test/CodeGen/ARM/fp16-fusedMAC.ll +++ b/llvm/test/CodeGen/ARM/fp16-fusedMAC.ll @@ -257,8 +257,8 @@ define arm_aapcs_vfpcc void @fms2(half *%a1, half *%a2, half *%a3) { ; CHECK-LABEL: fms2: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr.16 s0, [r0] -; CHECK-NEXT: vldr.16 s2, [r1] +; CHECK-NEXT: vldr.16 s0, [r1] +; CHECK-NEXT: vldr.16 s2, [r0] ; CHECK-NEXT: vldr.16 s4, [r2] ; CHECK-NEXT: vfms.f16 s4, s2, s0 ; CHECK-NEXT: vstr.16 s4, [r0] @@ -266,8 +266,8 @@ ; ; DONT-FUSE-LABEL: fms2: ; DONT-FUSE: @ %bb.0: -; DONT-FUSE-NEXT: vldr.16 s0, [r0] -; DONT-FUSE-NEXT: vldr.16 s2, [r1] +; DONT-FUSE-NEXT: vldr.16 s0, [r1] +; DONT-FUSE-NEXT: vldr.16 s2, [r0] ; DONT-FUSE-NEXT: vldr.16 s4, [r2] ; DONT-FUSE-NEXT: vfms.f16 s4, s2, s0 ; DONT-FUSE-NEXT: vstr.16 s4, [r0] @@ -399,8 +399,8 @@ define arm_aapcs_vfpcc void @fnms3(half *%a1, half *%a2, half *%a3) { ; CHECK-LABEL: fnms3: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr.16 s0, [r1] -; CHECK-NEXT: vldr.16 s2, [r0] +; CHECK-NEXT: vldr.16 s0, [r0] +; CHECK-NEXT: vldr.16 s2, [r1] ; CHECK-NEXT: vldr.16 s4, [r2] ; CHECK-NEXT: vfnms.f16 s4, s2, s0 ; CHECK-NEXT: vstr.16 s4, [r0] @@ -408,8 +408,8 @@ ; ; DONT-FUSE-LABEL: fnms3: ; DONT-FUSE: @ %bb.0: -; DONT-FUSE-NEXT: vldr.16 s0, [r1] -; DONT-FUSE-NEXT: vldr.16 s2, [r0] +; DONT-FUSE-NEXT: vldr.16 s0, [r0] +; DONT-FUSE-NEXT: vldr.16 s2, [r1] ; DONT-FUSE-NEXT: vldr.16 s4, [r2] ; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0 ; DONT-FUSE-NEXT: vstr.16 s4, [r0] diff --git a/llvm/test/CodeGen/RISCV/double-arith.ll b/llvm/test/CodeGen/RISCV/double-arith.ll --- a/llvm/test/CodeGen/RISCV/double-arith.ll +++ b/llvm/test/CodeGen/RISCV/double-arith.ll @@ -543,7 +543,7 @@ ; RV32IFD-NEXT: fcvt.d.w ft3, zero ; RV32IFD-NEXT: fadd.d ft2, ft2, ft3 ; RV32IFD-NEXT: fadd.d ft1, ft1, ft3 -; RV32IFD-NEXT: fnmadd.d ft0, ft0, ft2, ft1 +; RV32IFD-NEXT: fnmadd.d ft0, ft2, ft0, ft1 ; RV32IFD-NEXT: fsd ft0, 8(sp) ; RV32IFD-NEXT: lw a0, 8(sp) ; RV32IFD-NEXT: lw a1, 12(sp) @@ -558,7 +558,7 @@ ; RV64IFD-NEXT: fmv.d.x ft3, zero ; RV64IFD-NEXT: fadd.d ft2, ft2, ft3 ; RV64IFD-NEXT: fadd.d ft1, ft1, ft3 -; RV64IFD-NEXT: fnmadd.d ft0, ft0, ft2, ft1 +; RV64IFD-NEXT: fnmadd.d ft0, ft2, ft0, ft1 ; RV64IFD-NEXT: fmv.x.d a0, ft0 ; RV64IFD-NEXT: ret %b_ = fadd double 0.0, %b @@ -622,7 +622,7 @@ ; RV32IFD-NEXT: fld ft2, 8(sp) ; RV32IFD-NEXT: fcvt.d.w ft3, zero ; RV32IFD-NEXT: fadd.d ft2, ft2, ft3 -; RV32IFD-NEXT: fnmsub.d ft0, ft1, ft2, ft0 +; RV32IFD-NEXT: fnmsub.d ft0, ft2, ft1, ft0 ; RV32IFD-NEXT: fsd ft0, 8(sp) ; RV32IFD-NEXT: lw a0, 8(sp) ; RV32IFD-NEXT: lw a1, 12(sp) @@ -636,7 +636,7 @@ ; RV64IFD-NEXT: fmv.d.x ft2, a1 ; RV64IFD-NEXT: fmv.d.x ft3, zero ; RV64IFD-NEXT: fadd.d ft2, ft2, ft3 -; RV64IFD-NEXT: fnmsub.d ft0, ft1, ft2, ft0 +; RV64IFD-NEXT: fnmsub.d ft0, ft2, ft1, ft0 ; RV64IFD-NEXT: fmv.x.d a0, ft0 ; RV64IFD-NEXT: ret %b_ = fadd double 0.0, %b diff --git a/llvm/test/CodeGen/RISCV/float-arith.ll b/llvm/test/CodeGen/RISCV/float-arith.ll --- a/llvm/test/CodeGen/RISCV/float-arith.ll +++ b/llvm/test/CodeGen/RISCV/float-arith.ll @@ -405,7 +405,7 @@ ; RV32IF-NEXT: fmv.w.x ft3, zero ; RV32IF-NEXT: fadd.s ft2, ft2, ft3 ; RV32IF-NEXT: fadd.s ft1, ft1, ft3 -; RV32IF-NEXT: fnmadd.s ft0, ft0, ft2, ft1 +; RV32IF-NEXT: fnmadd.s ft0, ft2, ft0, ft1 ; RV32IF-NEXT: fmv.x.w a0, ft0 ; RV32IF-NEXT: ret ; @@ -417,7 +417,7 @@ ; RV64IF-NEXT: fmv.w.x ft3, zero ; RV64IF-NEXT: fadd.s ft2, ft2, ft3 ; RV64IF-NEXT: fadd.s ft1, ft1, ft3 -; RV64IF-NEXT: fnmadd.s ft0, ft0, ft2, ft1 +; RV64IF-NEXT: fnmadd.s ft0, ft2, ft0, ft1 ; RV64IF-NEXT: fmv.x.w a0, ft0 ; RV64IF-NEXT: ret %b_ = fadd float 0.0, %b @@ -464,7 +464,7 @@ ; RV32IF-NEXT: fmv.w.x ft2, a1 ; RV32IF-NEXT: fmv.w.x ft3, zero ; RV32IF-NEXT: fadd.s ft2, ft2, ft3 -; RV32IF-NEXT: fnmsub.s ft0, ft1, ft2, ft0 +; RV32IF-NEXT: fnmsub.s ft0, ft2, ft1, ft0 ; RV32IF-NEXT: fmv.x.w a0, ft0 ; RV32IF-NEXT: ret ; @@ -475,7 +475,7 @@ ; RV64IF-NEXT: fmv.w.x ft2, a1 ; RV64IF-NEXT: fmv.w.x ft3, zero ; RV64IF-NEXT: fadd.s ft2, ft2, ft3 -; RV64IF-NEXT: fnmsub.s ft0, ft1, ft2, ft0 +; RV64IF-NEXT: fnmsub.s ft0, ft2, ft1, ft0 ; RV64IF-NEXT: fmv.x.w a0, ft0 ; RV64IF-NEXT: ret %b_ = fadd float 0.0, %b diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll --- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll @@ -1592,7 +1592,7 @@ ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 ; CHECK-NEXT: vldrw.u32 q1, [r1], #16 ; CHECK-NEXT: vldrw.u32 q2, [r0], #16 -; CHECK-NEXT: vfms.f32 q2, q0, q1 +; CHECK-NEXT: vfms.f32 q2, q1, q0 ; CHECK-NEXT: vstrb.8 q2, [r3], #16 ; CHECK-NEXT: le lr, .LBB18_3 ; CHECK-NEXT: @ %bb.4: @ %while.end diff --git a/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll b/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll --- a/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll @@ -403,8 +403,8 @@ ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: vmov q3, q0 -; CHECK-NEXT: vldrw.u32 q1, [r1], #16 -; CHECK-NEXT: vldrw.u32 q2, [r0], #16 +; CHECK-NEXT: vldrw.u32 q1, [r0], #16 +; CHECK-NEXT: vldrw.u32 q2, [r1], #16 ; CHECK-NEXT: vfms.f32 q3, q2, q1 ; CHECK-NEXT: vstrw.32 q3, [r2], #16 ; CHECK-NEXT: letp lr, .LBB6_2 diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/ternary.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/ternary.ll --- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/ternary.ll +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/ternary.ll @@ -82,7 +82,7 @@ define arm_aapcs_vfpcc <8 x half> @test_vfmsq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) { ; CHECK-LABEL: test_vfmsq_f16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vfms.f16 q0, q1, q2 +; CHECK-NEXT: vfms.f16 q0, q2, q1 ; CHECK-NEXT: bx lr entry: %0 = fneg <8 x half> %c @@ -93,7 +93,7 @@ define arm_aapcs_vfpcc <4 x float> @test_vfmsq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; CHECK-LABEL: test_vfmsq_f32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vfms.f32 q0, q1, q2 +; CHECK-NEXT: vfms.f32 q0, q2, q1 ; CHECK-NEXT: bx lr entry: %0 = fneg <4 x float> %c