diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -47795,7 +47795,7 @@ // FIXME: How do we handle when fast math flags of FADD are different from // CFMUL's? SDValue CFmul = - DAG.getNode(NewOp, SDLoc(N), CVT, FAddOp1, MulOp0, MulOp1, N->getFlags()); + DAG.getNode(NewOp, SDLoc(N), CVT, MulOp0, MulOp1, FAddOp1, N->getFlags()); return DAG.getBitcast(VT, CFmul); } diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll --- a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll @@ -6,8 +6,7 @@ define dso_local <32 x half> @test1(<32 x half> %acc, <32 x half> %a, <32 x half> %b) { ; NO-SZ-LABEL: test1: ; NO-SZ: # %bb.0: # %entry -; NO-SZ-NEXT: vfcmaddcph %zmm1, %zmm0, %zmm2 -; NO-SZ-NEXT: vmovaps %zmm2, %zmm0 +; NO-SZ-NEXT: vfcmaddcph %zmm2, %zmm1, %zmm0 ; NO-SZ-NEXT: retq ; ; HAS-SZ-LABEL: test1: @@ -28,8 +27,7 @@ define dso_local <32 x half> @test2(<32 x half> %acc, <32 x half> %a, <32 x half> %b) { ; NO-SZ-LABEL: test2: ; NO-SZ: # %bb.0: # %entry -; NO-SZ-NEXT: vfmaddcph %zmm1, %zmm0, %zmm2 -; NO-SZ-NEXT: vmovaps %zmm2, %zmm0 +; NO-SZ-NEXT: vfmaddcph %zmm2, %zmm1, %zmm0 ; NO-SZ-NEXT: retq ; ; HAS-SZ-LABEL: test2: @@ -50,8 +48,7 @@ define dso_local <16 x half> @test3(<16 x half> %acc, <16 x half> %a, <16 x half> %b) { ; NO-SZ-LABEL: test3: ; NO-SZ: # %bb.0: # %entry -; NO-SZ-NEXT: vfcmaddcph %ymm1, %ymm0, %ymm2 -; NO-SZ-NEXT: vmovaps %ymm2, %ymm0 +; NO-SZ-NEXT: vfcmaddcph %ymm2, %ymm1, %ymm0 ; NO-SZ-NEXT: retq ; ; HAS-SZ-LABEL: test3: @@ -72,8 +69,7 @@ define dso_local <16 x half> @test4(<16 x half> %acc, <16 x half> %a, <16 x half> %b) { ; NO-SZ-LABEL: test4: ; NO-SZ: # %bb.0: # %entry -; NO-SZ-NEXT: vfmaddcph %ymm1, %ymm0, %ymm2 -; NO-SZ-NEXT: vmovaps %ymm2, %ymm0 +; NO-SZ-NEXT: vfmaddcph %ymm2, %ymm1, %ymm0 ; NO-SZ-NEXT: retq ; ; HAS-SZ-LABEL: test4: @@ -94,8 +90,7 @@ define dso_local <8 x half> @test5(<8 x half> %acc, <8 x half> %a, <8 x half> %b) { ; NO-SZ-LABEL: test5: ; NO-SZ: # %bb.0: # %entry -; NO-SZ-NEXT: vfcmaddcph %xmm1, %xmm0, %xmm2 -; NO-SZ-NEXT: vmovaps %xmm2, %xmm0 +; NO-SZ-NEXT: vfcmaddcph %xmm2, %xmm1, %xmm0 ; NO-SZ-NEXT: retq ; ; HAS-SZ-LABEL: test5: @@ -116,8 +111,7 @@ define dso_local <8 x half> @test6(<8 x half> %acc, <8 x half> %a, <8 x half> %b) { ; NO-SZ-LABEL: test6: ; NO-SZ: # %bb.0: # %entry -; NO-SZ-NEXT: vfmaddcph %xmm1, %xmm0, %xmm2 -; NO-SZ-NEXT: vmovaps %xmm2, %xmm0 +; NO-SZ-NEXT: vfmaddcph %xmm2, %xmm1, %xmm0 ; NO-SZ-NEXT: retq ; ; HAS-SZ-LABEL: test6: @@ -139,8 +133,7 @@ define dso_local <32 x half> @test13(<32 x half> %acc, <32 x half> %a, <32 x half> %b) { ; CHECK-LABEL: test13: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfcmaddcph %zmm1, %zmm0, %zmm2 -; CHECK-NEXT: vmovaps %zmm2, %zmm0 +; CHECK-NEXT: vfcmaddcph %zmm2, %zmm1, %zmm0 ; CHECK-NEXT: retq entry: %0 = bitcast <32 x half> %a to <16 x float> @@ -154,8 +147,7 @@ define dso_local <32 x half> @test14(<32 x half> %acc, <32 x half> %a, <32 x half> %b) { ; CHECK-LABEL: test14: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfmaddcph %zmm1, %zmm0, %zmm2 -; CHECK-NEXT: vmovaps %zmm2, %zmm0 +; CHECK-NEXT: vfmaddcph %zmm2, %zmm1, %zmm0 ; CHECK-NEXT: retq entry: %0 = bitcast <32 x half> %a to <16 x float> @@ -169,8 +161,7 @@ define dso_local <16 x half> @test15(<16 x half> %acc, <16 x half> %a, <16 x half> %b) { ; CHECK-LABEL: test15: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfcmaddcph %ymm1, %ymm0, %ymm2 -; CHECK-NEXT: vmovaps %ymm2, %ymm0 +; CHECK-NEXT: vfcmaddcph %ymm2, %ymm1, %ymm0 ; CHECK-NEXT: retq entry: %0 = bitcast <16 x half> %a to <8 x float> @@ -184,8 +175,7 @@ define dso_local <16 x half> @test16(<16 x half> %acc, <16 x half> %a, <16 x half> %b) { ; CHECK-LABEL: test16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfmaddcph %ymm1, %ymm0, %ymm2 -; CHECK-NEXT: vmovaps %ymm2, %ymm0 +; CHECK-NEXT: vfmaddcph %ymm2, %ymm1, %ymm0 ; CHECK-NEXT: retq entry: %0 = bitcast <16 x half> %a to <8 x float> @@ -199,8 +189,7 @@ define dso_local <8 x half> @test17(<8 x half> %acc, <8 x half> %a, <8 x half> %b) { ; CHECK-LABEL: test17: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfcmaddcph %xmm1, %xmm0, %xmm2 -; CHECK-NEXT: vmovaps %xmm2, %xmm0 +; CHECK-NEXT: vfcmaddcph %xmm2, %xmm1, %xmm0 ; CHECK-NEXT: retq entry: %0 = bitcast <8 x half> %a to <4 x float> @@ -214,8 +203,7 @@ define dso_local <8 x half> @test18(<8 x half> %acc, <8 x half> %a, <8 x half> %b) { ; CHECK-LABEL: test18: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfmaddcph %xmm1, %xmm0, %xmm2 -; CHECK-NEXT: vmovaps %xmm2, %xmm0 +; CHECK-NEXT: vfmaddcph %xmm2, %xmm1, %xmm0 ; CHECK-NEXT: retq entry: %0 = bitcast <8 x half> %a to <4 x float> diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmulc-fadd.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmulc-fadd.ll --- a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmulc-fadd.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmulc-fadd.ll @@ -4,8 +4,7 @@ define dso_local <32 x half> @test1(<32 x half> %acc.coerce, <32 x half> %lhs.coerce, <32 x half> %rhs.coerce) { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfmaddcph %zmm1, %zmm0, %zmm2 -; CHECK-NEXT: vmovaps %zmm2, %zmm0 +; CHECK-NEXT: vfmaddcph %zmm2, %zmm1, %zmm0 ; CHECK-NEXT: retq entry: %0 = bitcast <32 x half> %lhs.coerce to <16 x float> @@ -19,8 +18,7 @@ define dso_local <16 x half> @test2(<16 x half> %acc.coerce, <16 x half> %lhs.coerce, <16 x half> %rhs.coerce) { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfmaddcph %ymm1, %ymm0, %ymm2 -; CHECK-NEXT: vmovaps %ymm2, %ymm0 +; CHECK-NEXT: vfmaddcph %ymm2, %ymm1, %ymm0 ; CHECK-NEXT: retq entry: %0 = bitcast <16 x half> %lhs.coerce to <8 x float> @@ -34,8 +32,7 @@ define dso_local <8 x half> @test3(<8 x half> %acc.coerce, <8 x half> %lhs.coerce, <8 x half> %rhs.coerce) { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfmaddcph %xmm1, %xmm0, %xmm2 -; CHECK-NEXT: vmovaps %xmm2, %xmm0 +; CHECK-NEXT: vfmaddcph %xmm2, %xmm1, %xmm0 ; CHECK-NEXT: retq entry: %0 = bitcast <8 x half> %lhs.coerce to <4 x float> @@ -50,8 +47,7 @@ define dso_local <8 x half> @test4(<8 x half> %acc.coerce, <8 x half> %lhs.coerce, <8 x half> %rhs.coerce) { ; CHECK-LABEL: test4: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfmaddcph %xmm1, %xmm0, %xmm2 -; CHECK-NEXT: vmovaps %xmm2, %xmm0 +; CHECK-NEXT: vfmaddcph %xmm2, %xmm1, %xmm0 ; CHECK-NEXT: retq entry: %0 = bitcast <8 x half> %lhs.coerce to <4 x float> diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc-fadd.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc-fadd.ll --- a/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc-fadd.ll +++ b/llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc-fadd.ll @@ -4,8 +4,7 @@ define dso_local <32 x half> @test1(<32 x half> %acc.coerce, <32 x half> %lhs.coerce.conj, <32 x half> %rhs.coerce) local_unnamed_addr #0 { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfcmaddcph %zmm2, %zmm0, %zmm1 -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vfcmaddcph %zmm1, %zmm2, %zmm0 ; CHECK-NEXT: retq entry: %0 = bitcast <32 x half> %lhs.coerce.conj to <16 x i32> @@ -21,8 +20,7 @@ define dso_local <32 x half> @test2(<32 x half> %acc.coerce, <32 x half> %lhs.coerce.conj, <32 x half> %rhs.coerce) local_unnamed_addr #0 { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfcmaddcph %zmm2, %zmm0, %zmm1 -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vfcmaddcph %zmm1, %zmm2, %zmm0 ; CHECK-NEXT: retq entry: %0 = bitcast <32 x half> %lhs.coerce.conj to <16 x i32> @@ -38,8 +36,7 @@ define dso_local <16 x half> @test3(<16 x half> %acc.coerce, <16 x half> %lhs.coerce.conj, <16 x half> %rhs.coerce) local_unnamed_addr #0 { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfcmaddcph %ymm2, %ymm0, %ymm1 -; CHECK-NEXT: vmovaps %ymm1, %ymm0 +; CHECK-NEXT: vfcmaddcph %ymm1, %ymm2, %ymm0 ; CHECK-NEXT: retq entry: %0 = bitcast <16 x half> %lhs.coerce.conj to <8 x i32> @@ -55,8 +52,7 @@ define dso_local <8 x half> @test4(<8 x half> %acc.coerce, <8 x half> %lhs.coerce.conj, <8 x half> %rhs.coerce) local_unnamed_addr #0 { ; CHECK-LABEL: test4: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfcmaddcph %xmm2, %xmm0, %xmm1 -; CHECK-NEXT: vmovaps %xmm1, %xmm0 +; CHECK-NEXT: vfcmaddcph %xmm1, %xmm2, %xmm0 ; CHECK-NEXT: retq entry: %0 = bitcast <8 x half> %lhs.coerce.conj to <4 x i32> @@ -72,8 +68,7 @@ define dso_local <8 x half> @test5(<8 x half> %acc.coerce, <8 x half> %lhs.coerce.conj, <8 x half> %rhs.coerce) local_unnamed_addr #0 { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfcmaddcph %xmm2, %xmm0, %xmm1 -; CHECK-NEXT: vmovaps %xmm1, %xmm0 +; CHECK-NEXT: vfcmaddcph %xmm1, %xmm2, %xmm0 ; CHECK-NEXT: retq entry: %0 = bitcast <8 x half> %lhs.coerce.conj to <4 x i32> @@ -90,8 +85,7 @@ ; CHECK-LABEL: test6: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1 -; CHECK-NEXT: vfmaddcph %xmm1, %xmm0, %xmm2 -; CHECK-NEXT: vmovaps %xmm2, %xmm0 +; CHECK-NEXT: vfmaddcph %xmm2, %xmm1, %xmm0 ; CHECK-NEXT: retq entry: %0 = bitcast <8 x half> %lhs.coerce.conj to <4 x i32> @@ -107,8 +101,7 @@ define dso_local <8 x half> @test7(<8 x half> %acc.coerce, <8 x half> %lhs.coerce.conj, <8 x half> %rhs.coerce) local_unnamed_addr #0 { ; CHECK-LABEL: test7: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfcmaddcph %xmm2, %xmm0, %xmm1 -; CHECK-NEXT: vmovaps %xmm1, %xmm0 +; CHECK-NEXT: vfcmaddcph %xmm1, %xmm2, %xmm0 ; CHECK-NEXT: retq entry: %0 = bitcast <8 x half> %lhs.coerce.conj to <4 x i32> @@ -124,8 +117,7 @@ define dso_local <8 x half> @test8(<8 x half> %acc.coerce, <4 x float> %lhs.coerce.conj, <8 x half> %rhs.coerce) local_unnamed_addr #0 { ; CHECK-LABEL: test8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfcmaddcph %xmm2, %xmm0, %xmm1 -; CHECK-NEXT: vmovaps %xmm1, %xmm0 +; CHECK-NEXT: vfcmaddcph %xmm1, %xmm2, %xmm0 ; CHECK-NEXT: retq entry: %0 = bitcast <4 x float> %lhs.coerce.conj to <4 x i32> @@ -141,8 +133,7 @@ define dso_local <32 x half> @test9(<32 x half> %acc.coerce, <8 x i64> %lhs.coerce.conj, <32 x half> %rhs.coerce) local_unnamed_addr #0 { ; CHECK-LABEL: test9: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfcmaddcph %zmm2, %zmm0, %zmm1 -; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: vfcmaddcph %zmm1, %zmm2, %zmm0 ; CHECK-NEXT: retq entry: %xor1.i = xor <8 x i64> %lhs.coerce.conj,