This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/trunk/
-
trunk/
-
lib/CodeGen/SelectionDAG/
-
CodeGen/
-
SelectionDAG/
-
DAGCombiner.cpp
-
test/CodeGen/
-
CodeGen/
-
AMDGPU/
-
fneg-fabs.f16.ll
-
X86/
-
fma_patterns.ll
-
fma_patterns_wide.ll
-
fp-fold.ll

Differential D62963

[DAGCombine] GetNegatedExpression - constant float vector support (PR42105)
ClosedPublic

Authored by RKSimon on Jun 6 2019, 8:50 AM.

Download Raw Diff

Details

Reviewers

arsenm
spatel
craig.topper
cameron.mcinally

Commits

rG287e78c82bd7: [DAGCombine] GetNegatedExpression - constant float vector support (PR42105)
rL363040: [DAGCombine] GetNegatedExpression - constant float vector support (PR42105)

Summary

Add support for negation of constant build vectors.

Diff Detail

Repository: rL LLVM

Event Timeline

RKSimon created this revision.Jun 6 2019, 8:50 AM

Herald added a project: Restricted Project. · View Herald TranscriptJun 6 2019, 8:50 AM

Herald added subscribers: nhaehnle, wdng, jvesely. · View Herald Transcript

RKSimon added a reviewer: cameron.mcinally.Jun 7 2019, 7:05 AM

I haven't enabled handling of undefs in build vector - but can anyone think of a reason that I shouldn't?

Yeah, I think undef should be handled. InstSimplify (and other IR passes IIRC) do it:

define <2 x float> @fsub_-0_-0_x_vec_undef_elts(<2 x float> %a) {
; CHECK-LABEL: @fsub_-0_-0_x_vec_undef_elts(
; CHECK-NEXT:    ret <2 x float> [[A:%.*]]
;
  %t1 = fsub <2 x float> <float undef, float -0.0>, %a
  %ret = fsub <2 x float> <float -0.0, float undef>, %t1
  ret <2 x float> %ret
}

Added undef handling

LGTM, although I'm not fluent in AMDGPU assembly. You may want to wait a little while to see if an expert comes along.

lib/CodeGen/SelectionDAG/DAGCombiner.cpp
913 ↗	(On Diff #203570)	Nit: I don't anticipate any regressions from this change, but this could be split-off to a separate patch -- if we're being pedantic.

This revision is now accepted and ready to land.Jun 7 2019, 12:19 PM

@arsenm Any comments regarding the amdgpu changes?

Closed by commit rL363040: [DAGCombine] GetNegatedExpression - constant float vector support (PR42105) (authored by RKSimon). · Explain WhyJun 11 2019, 2:41 AM

This revision was automatically updated to reflect the committed changes.

This breaks (at least) PowerPC with the typical DAG Combine cycle (i.e. one combine undoes the other in a cycle). Here's a minimal test case to show this:

define dso_local <4 x double> @sub(double %b, double* nocapture readonly %ptr) local_unnamed_addr {
entry:
  %arrayidx = getelementptr inbounds double, double* %ptr, i64 45320
  %0 = load double, double* %arrayidx, align 4
  %vecinit = insertelement <4 x double> undef, double %0, i32 0
  %arrayidx1 = getelementptr inbounds double, double* %ptr, i64 176
  %1 = load double, double* %arrayidx1, align 4
  %vecinit2 = insertelement <4 x double> %vecinit, double %1, i32 1
  %arrayidx3 = getelementptr inbounds double, double* %ptr, i64 2734
  %2 = load double, double* %arrayidx3, align 4
  %vecinit4 = insertelement <4 x double> %vecinit2, double %2, i32 2
  %arrayidx5 = getelementptr inbounds double, double* %ptr, i64 7
  %3 = load double, double* %arrayidx5, align 4
  %vecinit6 = insertelement <4 x double> %vecinit4, double %3, i32 3
  %splat.splatinsert = insertelement <4 x double> undef, double %b, i32 0
  %splat.splat = shufflevector <4 x double> %splat.splatinsert, <4 x double> undef, <4 x i32> zeroinitializer
  %div = fdiv fast <4 x double> %vecinit6, %splat.splat
  %sub = fsub fast <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00>, %div
  ret <4 x double> %sub
}

Compile with llc -mtriple=powerpc64le-unknown-unknown

Same issue happens with arm64, aarch64, nvptx triples.

Reduced:

define <4 x double> @sub(double %a0, <4 x double> %a1) {
entry:
  %splat.splatinsert = insertelement <4 x double> undef, double %a0, i32 0
  %splat.splat = shufflevector <4 x double> %splat.splatinsert, <4 x double> undef, <4 x i32> zeroinitializer
  %div = fdiv fast <4 x double> %a1, %splat.splat
  %sub = fsub fast <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00>, %div
  ret <4 x double> %sub
}

RKSimon mentioned this in rL364326: [DAGCombine] combineRepeatedFPDivisors - recognize -1.0 / X as a reciprocal.Jun 25 2019, 9:03 AM

RKSimon mentioned this in rG9762b26032c8: [DAGCombine] combineRepeatedFPDivisors - recognize -1.0 / X as a reciprocal.

Revision Contents

Path

Size

llvm/

trunk/

lib/

CodeGen/

SelectionDAG/

DAGCombiner.cpp

49 lines

test/

CodeGen/

AMDGPU/

fneg-fabs.f16.ll

17 lines

X86/

fma_patterns.ll

36 lines

fma_patterns_wide.ll

56 lines

fp-fold.ll

6 lines

Diff 203994

llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 793 Lines • ▼ Show 20 Lines	if (!LegalOperations)
return 1;		return 1;

// Don't invert constant FP values after legalization unless the target says		// Don't invert constant FP values after legalization unless the target says
// the negated constant is legal.		// the negated constant is legal.
return TLI.isOperationLegal(ISD::ConstantFP, VT) \|\|		return TLI.isOperationLegal(ISD::ConstantFP, VT) \|\|
TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,		TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
ForCodeSize);		ForCodeSize);
}		}
		case ISD::BUILD_VECTOR: {
		// Only permit BUILD_VECTOR of constants.
		if (llvm::any_of(Op->op_values(), [&](SDValue N) {
		return !N.isUndef() && !isa<ConstantFPSDNode>(N);
		}))
		return 0;
		if (!LegalOperations)
		return 1;
		if (TLI.isOperationLegal(ISD::ConstantFP, VT) &&
		TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
		return 1;
		return llvm::all_of(Op->op_values(), [&](SDValue N) {
		return N.isUndef() \|\|
		TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
		ForCodeSize);
		});
		}
case ISD::FADD:		case ISD::FADD:
if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())		if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
return 0;		return 0;

// After operation legalization, it might not be legal to create new FSUBs.		// After operation legalization, it might not be legal to create new FSUBs.
if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))		if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
return 0;		return 0;

▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines	static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,

switch (Op.getOpcode()) {		switch (Op.getOpcode()) {
default: llvm_unreachable("Unknown code");		default: llvm_unreachable("Unknown code");
case ISD::ConstantFP: {		case ISD::ConstantFP: {
APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();		APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
V.changeSign();		V.changeSign();
return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());		return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
}		}
		case ISD::BUILD_VECTOR: {
		SmallVector<SDValue, 4> Ops;
		for (SDValue C : Op->op_values()) {
		if (C.isUndef()) {
		Ops.push_back(C);
		continue;
		}
		APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
		V.changeSign();
		Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType()));
		}
		return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops);
		}
case ISD::FADD:		case ISD::FADD:
assert(Options.UnsafeFPMath \|\| Flags.hasNoSignedZeros());		assert(Options.UnsafeFPMath \|\| Flags.hasNoSignedZeros());

// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)		// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
if (isNegatibleForFree(Op.getOperand(0), LegalOperations,		if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
DAG.getTargetLoweringInfo(), &Options, ForCodeSize,		DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
Depth+1))		Depth + 1))
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),		return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,		GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, ForCodeSize,		LegalOperations, ForCodeSize,
Depth+1),		Depth + 1),
Op.getOperand(1), Flags);		Op.getOperand(1), Flags);
// fold (fneg (fadd A, B)) -> (fsub (fneg B), A)		// fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),		return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(1), DAG,		GetNegatedExpression(Op.getOperand(1), DAG,
LegalOperations, ForCodeSize,		LegalOperations, ForCodeSize,
Depth+1),		Depth + 1),
Op.getOperand(0), Flags);		Op.getOperand(0), Flags);
case ISD::FSUB:		case ISD::FSUB:
// fold (fneg (fsub 0, B)) -> B		// fold (fneg (fsub 0, B)) -> B
if (auto *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))		if (ConstantFPSDNode *N0CFP =
		isConstOrConstSplatFP(Op.getOperand(0), /AllowUndefs/ true))
if (N0CFP->isZero())		if (N0CFP->isZero())
return Op.getOperand(1);		return Op.getOperand(1);

// fold (fneg (fsub A, B)) -> (fsub B, A)		// fold (fneg (fsub A, B)) -> (fsub B, A)
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),		return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
Op.getOperand(1), Op.getOperand(0), Flags);		Op.getOperand(1), Op.getOperand(0), Flags);

case ISD::FMUL:		case ISD::FMUL:
case ISD::FDIV:		case ISD::FDIV:
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)		// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
if (isNegatibleForFree(Op.getOperand(0), LegalOperations,		if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
DAG.getTargetLoweringInfo(), &Options, ForCodeSize,		DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
Depth+1))		Depth + 1))
return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),		return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,		GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, ForCodeSize,		LegalOperations, ForCodeSize,
Depth+1),		Depth + 1),
Op.getOperand(1), Flags);		Op.getOperand(1), Flags);

// fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))		// fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),		return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
Op.getOperand(0),		Op.getOperand(0),
GetNegatedExpression(Op.getOperand(1), DAG,		GetNegatedExpression(Op.getOperand(1), DAG,
LegalOperations, ForCodeSize,		LegalOperations, ForCodeSize,
Depth+1), Flags);		Depth + 1), Flags);

case ISD::FP_EXTEND:		case ISD::FP_EXTEND:
case ISD::FSIN:		case ISD::FSIN:
return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),		return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,		GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, ForCodeSize,		LegalOperations, ForCodeSize,
Depth+1));		Depth + 1));
case ISD::FP_ROUND:		case ISD::FP_ROUND:
return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),		return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,		GetNegatedExpression(Op.getOperand(0), DAG,
LegalOperations, ForCodeSize,		LegalOperations, ForCodeSize,
Depth+1),		Depth + 1),
Op.getOperand(1));		Op.getOperand(1));
}		}
}		}

// APInts must be the same size for most operations, this helper		// APInts must be the same size for most operations, this helper
// function zero extends the shorter of the pair so that they match.		// function zero extends the shorter of the pair so that they match.
// We provide an Offset so that we can create bitwidths that won't overflow.		// We provide an Offset so that we can create bitwidths that won't overflow.
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {		static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
▲ Show 20 Lines • Show All 19,643 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/AMDGPU/fneg-fabs.f16.ll

Show First 20 Lines • Show All 104 Lines • ▼ Show 20 Lines	define amdgpu_kernel void @fneg_fabs_v4f16(<4 x half> addrspace(1)* %out, <4 x half> %in) {
%fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %in)		%fabs = call <4 x half> @llvm.fabs.v4f16(<4 x half> %in)
%fsub = fsub <4 x half> <half -0.0, half -0.0, half -0.0, half -0.0>, %fabs		%fsub = fsub <4 x half> <half -0.0, half -0.0, half -0.0, half -0.0>, %fabs
store <4 x half> %fsub, <4 x half> addrspace(1)* %out		store <4 x half> %fsub, <4 x half> addrspace(1)* %out
ret void		ret void
}		}

; GCN-LABEL: {{^}}fold_user_fneg_fabs_v2f16:		; GCN-LABEL: {{^}}fold_user_fneg_fabs_v2f16:
; CI: s_load_dword [[IN:s[0-9]+]]		; CI: s_load_dword [[IN:s[0-9]+]]
; CI: s_or_b32 [[FNEG_FABS:s[0-9]+]], [[IN]], 0x80008000
; CI: s_lshr_b32		; CI: s_lshr_b32
; CI: v_cvt_f32_f16_e32 v{{[0-9]+}}, s{{[0-9]+}}		; CI: v_cvt_f32_f16_e64 v{{[0-9]+}}, \|s{{[0-9]+}}\|
; CI: v_cvt_f32_f16_e32 v{{[0-9]+}}, s{{[0-9]+}}		; CI: v_cvt_f32_f16_e64 v{{[0-9]+}}, \|s{{[0-9]+}}\|
; CI: v_mul_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}		; CI: v_mul_f32_e32 v{{[0-9]+}}, -4.0, v{{[0-9]+}}
; CI: v_mul_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}}		; CI: v_mul_f32_e32 v{{[0-9]+}}, -4.0, v{{[0-9]+}}

; VI: v_mul_f16_e64 v{{[0-9]+}}, -\|s{{[0-9]+}}\|, 4.0		; VI: v_mul_f16_e64 v{{[0-9]+}}, \|s{{[0-9]+}}\|, -4.0
; VI: v_mul_f16_sdwa v{{[0-9]+}}, -\|v{{[0-9]+}}\|, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD		; VI: v_mul_f16_sdwa v{{[0-9]+}}, \|v{{[0-9]+}}\|, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD

; GFX9: s_and_b32 [[ABS:s[0-9]+]], s{{[0-9]+}}, 0x7fff7fff		; GFX9: s_and_b32 [[ABS:s[0-9]+]], s{{[0-9]+}}, 0x7fff7fff
; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[ABS]], 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]		; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[ABS]], -4.0 op_sel_hi:[1,0]
define amdgpu_kernel void @fold_user_fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) #0 {		define amdgpu_kernel void @fold_user_fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) #0 {
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)		%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
%fneg.fabs = fsub <2 x half> <half -0.0, half -0.0>, %fabs		%fneg.fabs = fsub <2 x half> <half -0.0, half -0.0>, %fabs
%mul = fmul <2 x half> %fneg.fabs, <half 4.0, half 4.0>		%mul = fmul <2 x half> %fneg.fabs, <half 4.0, half 4.0>
store <2 x half> %mul, <2 x half> addrspace(1)* %out		store <2 x half> %mul, <2 x half> addrspace(1)* %out
ret void		ret void
}		}

Show All 9 Lines	define amdgpu_kernel void @s_fneg_multi_use_fabs_v2f16(<2 x half> addrspace(1)* %out0, <2 x half> addrspace(1)* %out1, <2 x half> %in) {
%fneg = fsub <2 x half> <half -0.0, half -0.0>, %fabs		%fneg = fsub <2 x half> <half -0.0, half -0.0>, %fabs
store <2 x half> %fabs, <2 x half> addrspace(1)* %out0		store <2 x half> %fabs, <2 x half> addrspace(1)* %out0
store <2 x half> %fneg, <2 x half> addrspace(1)* %out1		store <2 x half> %fneg, <2 x half> addrspace(1)* %out1
ret void		ret void
}		}

; GCN-LABEL: {{^}}s_fneg_multi_use_fabs_foldable_neg_v2f16:		; GCN-LABEL: {{^}}s_fneg_multi_use_fabs_foldable_neg_v2f16:
; GFX9: s_and_b32 [[ABS:s[0-9]+]], s{{[0-9]+}}, 0x7fff7fff		; GFX9: s_and_b32 [[ABS:s[0-9]+]], s{{[0-9]+}}, 0x7fff7fff
; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[ABS]], 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]		; GFX9: v_pk_mul_f16 v{{[0-9]+}}, [[ABS]], -4.0 op_sel_hi:[1,0]
define amdgpu_kernel void @s_fneg_multi_use_fabs_foldable_neg_v2f16(<2 x half> addrspace(1)* %out0, <2 x half> addrspace(1)* %out1, <2 x half> %in) {		define amdgpu_kernel void @s_fneg_multi_use_fabs_foldable_neg_v2f16(<2 x half> addrspace(1)* %out0, <2 x half> addrspace(1)* %out1, <2 x half> %in) {
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)		%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
%fneg = fsub <2 x half> <half -0.0, half -0.0>, %fabs		%fneg = fsub <2 x half> <half -0.0, half -0.0>, %fabs
%mul = fmul <2 x half> %fneg, <half 4.0, half 4.0>		%mul = fmul <2 x half> %fneg, <half 4.0, half 4.0>
store <2 x half> %fabs, <2 x half> addrspace(1)* %out0		store <2 x half> %fabs, <2 x half> addrspace(1)* %out0
store <2 x half> %mul, <2 x half> addrspace(1)* %out1		store <2 x half> %mul, <2 x half> addrspace(1)* %out1
ret void		ret void
}		}

declare half @llvm.fabs.f16(half) #1		declare half @llvm.fabs.f16(half) #1
declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #1		declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #1
declare <4 x half> @llvm.fabs.v4f16(<4 x half>) #1		declare <4 x half> @llvm.fabs.v4f16(<4 x half>) #1

attributes #0 = { nounwind }		attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }		attributes #1 = { nounwind readnone }

llvm/trunk/test/CodeGen/X86/fma_patterns.ll

Show First 20 Lines • Show All 1,157 Lines • ▼ Show 20 Lines	; AVX512-NOINFS-NEXT: retq
%s = fsub <4 x float> <float -1.0, float -1.0, float undef, float -1.0>, %x		%s = fsub <4 x float> <float -1.0, float -1.0, float undef, float -1.0>, %x
%m = fmul <4 x float> %y, %s		%m = fmul <4 x float> %y, %s
ret <4 x float> %m		ret <4 x float> %m
}		}

define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) {		define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) {
; FMA-INFS-LABEL: test_v4f32_mul_sub_x_one_y:		; FMA-INFS-LABEL: test_v4f32_mul_sub_x_one_y:
; FMA-INFS: # %bb.0:		; FMA-INFS: # %bb.0:
; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0		; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0		; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
; FMA-INFS-NEXT: retq		; FMA-INFS-NEXT: retq
;		;
; FMA4-INFS-LABEL: test_v4f32_mul_sub_x_one_y:		; FMA4-INFS-LABEL: test_v4f32_mul_sub_x_one_y:
; FMA4-INFS: # %bb.0:		; FMA4-INFS: # %bb.0:
; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0		; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0		; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
; FMA4-INFS-NEXT: retq		; FMA4-INFS-NEXT: retq
;		;
; AVX512-INFS-LABEL: test_v4f32_mul_sub_x_one_y:		; AVX512-INFS-LABEL: test_v4f32_mul_sub_x_one_y:
; AVX512-INFS: # %bb.0:		; AVX512-INFS: # %bb.0:
; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0		; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0		; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
; AVX512-INFS-NEXT: retq		; AVX512-INFS-NEXT: retq
;		;
; FMA-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y:		; FMA-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y:
; FMA-NOINFS: # %bb.0:		; FMA-NOINFS: # %bb.0:
; FMA-NOINFS-NEXT: vfmsub213ps {{.#+}} xmm0 = (xmm1 xmm0) - xmm1		; FMA-NOINFS-NEXT: vfmsub213ps {{.#+}} xmm0 = (xmm1 xmm0) - xmm1
; FMA-NOINFS-NEXT: retq		; FMA-NOINFS-NEXT: retq
;		;
Show All 9 Lines	; AVX512-NOINFS-NEXT: retq
%s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>		%s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
%m = fmul <4 x float> %s, %y		%m = fmul <4 x float> %s, %y
ret <4 x float> %m		ret <4 x float> %m
}		}

define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) {		define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) {
; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_one:		; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_one:
; FMA-INFS: # %bb.0:		; FMA-INFS: # %bb.0:
; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0		; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0		; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
; FMA-INFS-NEXT: retq		; FMA-INFS-NEXT: retq
;		;
; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_one:		; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_one:
; FMA4-INFS: # %bb.0:		; FMA4-INFS: # %bb.0:
; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0		; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0		; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
; FMA4-INFS-NEXT: retq		; FMA4-INFS-NEXT: retq
;		;
; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_one:		; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_one:
; AVX512-INFS: # %bb.0:		; AVX512-INFS: # %bb.0:
; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0		; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0		; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
; AVX512-INFS-NEXT: retq		; AVX512-INFS-NEXT: retq
;		;
; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one:		; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one:
; FMA-NOINFS: # %bb.0:		; FMA-NOINFS: # %bb.0:
; FMA-NOINFS-NEXT: vfmsub213ps {{.#+}} xmm0 = (xmm1 xmm0) - xmm1		; FMA-NOINFS-NEXT: vfmsub213ps {{.#+}} xmm0 = (xmm1 xmm0) - xmm1
; FMA-NOINFS-NEXT: retq		; FMA-NOINFS-NEXT: retq
;		;
Show All 9 Lines	; AVX512-NOINFS-NEXT: retq
%s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>		%s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
%m = fmul <4 x float> %y, %s		%m = fmul <4 x float> %y, %s
ret <4 x float> %m		ret <4 x float> %m
}		}

define <4 x float> @test_v4f32_mul_y_sub_x_one_undefs(<4 x float> %x, <4 x float> %y) {		define <4 x float> @test_v4f32_mul_y_sub_x_one_undefs(<4 x float> %x, <4 x float> %y) {
; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs:		; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs:
; FMA-INFS: # %bb.0:		; FMA-INFS: # %bb.0:
; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0		; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0		; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
; FMA-INFS-NEXT: retq		; FMA-INFS-NEXT: retq
;		;
; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs:		; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs:
; FMA4-INFS: # %bb.0:		; FMA4-INFS: # %bb.0:
; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0		; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0		; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
; FMA4-INFS-NEXT: retq		; FMA4-INFS-NEXT: retq
;		;
; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs:		; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs:
; AVX512-INFS: # %bb.0:		; AVX512-INFS: # %bb.0:
; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0		; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0		; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
; AVX512-INFS-NEXT: retq		; AVX512-INFS-NEXT: retq
;		;
; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs:		; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs:
; FMA-NOINFS: # %bb.0:		; FMA-NOINFS: # %bb.0:
; FMA-NOINFS-NEXT: vfmsub213ps {{.#+}} xmm0 = (xmm1 xmm0) - xmm1		; FMA-NOINFS-NEXT: vfmsub213ps {{.#+}} xmm0 = (xmm1 xmm0) - xmm1
; FMA-NOINFS-NEXT: retq		; FMA-NOINFS-NEXT: retq
;		;
Show All 9 Lines	; AVX512-NOINFS-NEXT: retq
%s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float undef>		%s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float undef>
%m = fmul <4 x float> %y, %s		%m = fmul <4 x float> %y, %s
ret <4 x float> %m		ret <4 x float> %m
}		}

define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y) {		define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y) {
; FMA-INFS-LABEL: test_v4f32_mul_sub_x_negone_y:		; FMA-INFS-LABEL: test_v4f32_mul_sub_x_negone_y:
; FMA-INFS: # %bb.0:		; FMA-INFS: # %bb.0:
; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0		; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0		; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
; FMA-INFS-NEXT: retq		; FMA-INFS-NEXT: retq
;		;
; FMA4-INFS-LABEL: test_v4f32_mul_sub_x_negone_y:		; FMA4-INFS-LABEL: test_v4f32_mul_sub_x_negone_y:
; FMA4-INFS: # %bb.0:		; FMA4-INFS: # %bb.0:
; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0		; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0		; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
; FMA4-INFS-NEXT: retq		; FMA4-INFS-NEXT: retq
;		;
; AVX512-INFS-LABEL: test_v4f32_mul_sub_x_negone_y:		; AVX512-INFS-LABEL: test_v4f32_mul_sub_x_negone_y:
; AVX512-INFS: # %bb.0:		; AVX512-INFS: # %bb.0:
; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0		; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0		; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
; AVX512-INFS-NEXT: retq		; AVX512-INFS-NEXT: retq
;		;
; FMA-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y:		; FMA-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y:
; FMA-NOINFS: # %bb.0:		; FMA-NOINFS: # %bb.0:
; FMA-NOINFS-NEXT: vfmadd213ps {{.#+}} xmm0 = (xmm1 xmm0) + xmm1		; FMA-NOINFS-NEXT: vfmadd213ps {{.#+}} xmm0 = (xmm1 xmm0) + xmm1
; FMA-NOINFS-NEXT: retq		; FMA-NOINFS-NEXT: retq
;		;
Show All 9 Lines	; AVX512-NOINFS-NEXT: retq
%s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>		%s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
%m = fmul <4 x float> %s, %y		%m = fmul <4 x float> %s, %y
ret <4 x float> %m		ret <4 x float> %m
}		}

define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y) {		define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y) {
; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_negone:		; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_negone:
; FMA-INFS: # %bb.0:		; FMA-INFS: # %bb.0:
; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0		; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0		; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
; FMA-INFS-NEXT: retq		; FMA-INFS-NEXT: retq
;		;
; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_negone:		; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_negone:
; FMA4-INFS: # %bb.0:		; FMA4-INFS: # %bb.0:
; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0		; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0		; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
; FMA4-INFS-NEXT: retq		; FMA4-INFS-NEXT: retq
;		;
; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_negone:		; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_negone:
; AVX512-INFS: # %bb.0:		; AVX512-INFS: # %bb.0:
; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0		; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0		; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
; AVX512-INFS-NEXT: retq		; AVX512-INFS-NEXT: retq
;		;
; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone:		; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone:
; FMA-NOINFS: # %bb.0:		; FMA-NOINFS: # %bb.0:
; FMA-NOINFS-NEXT: vfmadd213ps {{.#+}} xmm0 = (xmm1 xmm0) + xmm1		; FMA-NOINFS-NEXT: vfmadd213ps {{.#+}} xmm0 = (xmm1 xmm0) + xmm1
; FMA-NOINFS-NEXT: retq		; FMA-NOINFS-NEXT: retq
;		;
Show All 9 Lines	; AVX512-NOINFS-NEXT: retq
%s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>		%s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
%m = fmul <4 x float> %y, %s		%m = fmul <4 x float> %y, %s
ret <4 x float> %m		ret <4 x float> %m
}		}

define <4 x float> @test_v4f32_mul_y_sub_x_negone_undefs(<4 x float> %x, <4 x float> %y) {		define <4 x float> @test_v4f32_mul_y_sub_x_negone_undefs(<4 x float> %x, <4 x float> %y) {
; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs:		; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs:
; FMA-INFS: # %bb.0:		; FMA-INFS: # %bb.0:
; FMA-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0		; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0		; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
; FMA-INFS-NEXT: retq		; FMA-INFS-NEXT: retq
;		;
; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs:		; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs:
; FMA4-INFS: # %bb.0:		; FMA4-INFS: # %bb.0:
; FMA4-INFS-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0		; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0		; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
; FMA4-INFS-NEXT: retq		; FMA4-INFS-NEXT: retq
;		;
; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs:		; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs:
; AVX512-INFS: # %bb.0:		; AVX512-INFS: # %bb.0:
; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to4}, %xmm0, %xmm0		; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0		; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
; AVX512-INFS-NEXT: retq		; AVX512-INFS-NEXT: retq
;		;
; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs:		; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs:
; FMA-NOINFS: # %bb.0:		; FMA-NOINFS: # %bb.0:
; FMA-NOINFS-NEXT: vfmadd213ps {{.#+}} xmm0 = (xmm1 xmm0) + xmm1		; FMA-NOINFS-NEXT: vfmadd213ps {{.#+}} xmm0 = (xmm1 xmm0) + xmm1
; FMA-NOINFS-NEXT: retq		; FMA-NOINFS-NEXT: retq
;		;
▲ Show 20 Lines • Show All 725 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/fma_patterns_wide.ll

Show First 20 Lines • Show All 671 Lines • ▼ Show 20 Lines	; AVX512-NOINFS-NEXT: retq
%s = fsub <8 x double> <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>, %x		%s = fsub <8 x double> <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>, %x
%m = fmul <8 x double> %y, %s		%m = fmul <8 x double> %y, %s
ret <8 x double> %m		ret <8 x double> %m
}		}

define <16 x float> @test_v16f32_mul_sub_x_one_y(<16 x float> %x, <16 x float> %y) {		define <16 x float> @test_v16f32_mul_sub_x_one_y(<16 x float> %x, <16 x float> %y) {
; FMA-INFS-LABEL: test_v16f32_mul_sub_x_one_y:		; FMA-INFS-LABEL: test_v16f32_mul_sub_x_one_y:
; FMA-INFS: # %bb.0:		; FMA-INFS: # %bb.0:
; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]		; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
; FMA-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1		; FMA-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1
; FMA-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0		; FMA-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0
; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0		; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1		; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
; FMA-INFS-NEXT: retq		; FMA-INFS-NEXT: retq
;		;
; FMA4-INFS-LABEL: test_v16f32_mul_sub_x_one_y:		; FMA4-INFS-LABEL: test_v16f32_mul_sub_x_one_y:
; FMA4-INFS: # %bb.0:		; FMA4-INFS: # %bb.0:
; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]		; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
; FMA4-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1		; FMA4-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1
; FMA4-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0		; FMA4-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0
; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0		; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1		; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
; FMA4-INFS-NEXT: retq		; FMA4-INFS-NEXT: retq
;		;
; AVX512-INFS-LABEL: test_v16f32_mul_sub_x_one_y:		; AVX512-INFS-LABEL: test_v16f32_mul_sub_x_one_y:
; AVX512-INFS: # %bb.0:		; AVX512-INFS: # %bb.0:
; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to16}, %zmm0, %zmm0		; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0		; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0
; AVX512-INFS-NEXT: retq		; AVX512-INFS-NEXT: retq
;		;
; FMA-NOINFS-LABEL: test_v16f32_mul_sub_x_one_y:		; FMA-NOINFS-LABEL: test_v16f32_mul_sub_x_one_y:
; FMA-NOINFS: # %bb.0:		; FMA-NOINFS: # %bb.0:
; FMA-NOINFS-NEXT: vfmsub213ps {{.#+}} ymm0 = (ymm2 ymm0) - ymm2		; FMA-NOINFS-NEXT: vfmsub213ps {{.#+}} ymm0 = (ymm2 ymm0) - ymm2
; FMA-NOINFS-NEXT: vfmsub213ps {{.#+}} ymm1 = (ymm3 ymm1) - ymm3		; FMA-NOINFS-NEXT: vfmsub213ps {{.#+}} ymm1 = (ymm3 ymm1) - ymm3
; FMA-NOINFS-NEXT: retq		; FMA-NOINFS-NEXT: retq
Show All 11 Lines	; AVX512-NOINFS-NEXT: retq
%s = fsub <16 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>		%s = fsub <16 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
%m = fmul <16 x float> %s, %y		%m = fmul <16 x float> %s, %y
ret <16 x float> %m		ret <16 x float> %m
}		}

define <8 x double> @test_v8f64_mul_y_sub_x_one(<8 x double> %x, <8 x double> %y) {		define <8 x double> @test_v8f64_mul_y_sub_x_one(<8 x double> %x, <8 x double> %y) {
; FMA-INFS-LABEL: test_v8f64_mul_y_sub_x_one:		; FMA-INFS-LABEL: test_v8f64_mul_y_sub_x_one:
; FMA-INFS: # %bb.0:		; FMA-INFS: # %bb.0:
; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]		; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
; FMA-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1		; FMA-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1
; FMA-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0		; FMA-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0
; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0		; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1		; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1
; FMA-INFS-NEXT: retq		; FMA-INFS-NEXT: retq
;		;
; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_x_one:		; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_x_one:
; FMA4-INFS: # %bb.0:		; FMA4-INFS: # %bb.0:
; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]		; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1		; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1
; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0		; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0
; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0		; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1		; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1
; FMA4-INFS-NEXT: retq		; FMA4-INFS-NEXT: retq
;		;
; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_x_one:		; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_x_one:
; AVX512-INFS: # %bb.0:		; AVX512-INFS: # %bb.0:
; AVX512-INFS-NEXT: vsubpd {{.*}}(%rip){1to8}, %zmm0, %zmm0		; AVX512-INFS-NEXT: vaddpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0		; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0
; AVX512-INFS-NEXT: retq		; AVX512-INFS-NEXT: retq
;		;
; FMA-NOINFS-LABEL: test_v8f64_mul_y_sub_x_one:		; FMA-NOINFS-LABEL: test_v8f64_mul_y_sub_x_one:
; FMA-NOINFS: # %bb.0:		; FMA-NOINFS: # %bb.0:
; FMA-NOINFS-NEXT: vfmsub213pd {{.#+}} ymm0 = (ymm2 ymm0) - ymm2		; FMA-NOINFS-NEXT: vfmsub213pd {{.#+}} ymm0 = (ymm2 ymm0) - ymm2
; FMA-NOINFS-NEXT: vfmsub213pd {{.#+}} ymm1 = (ymm3 ymm1) - ymm3		; FMA-NOINFS-NEXT: vfmsub213pd {{.#+}} ymm1 = (ymm3 ymm1) - ymm3
; FMA-NOINFS-NEXT: retq		; FMA-NOINFS-NEXT: retq
Show All 11 Lines	; AVX512-NOINFS-NEXT: retq
%s = fsub <8 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>		%s = fsub <8 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>
%m = fmul <8 x double> %y, %s		%m = fmul <8 x double> %y, %s
ret <8 x double> %m		ret <8 x double> %m
}		}

define <16 x float> @test_v16f32_mul_sub_x_negone_y(<16 x float> %x, <16 x float> %y) {		define <16 x float> @test_v16f32_mul_sub_x_negone_y(<16 x float> %x, <16 x float> %y) {
; FMA-INFS-LABEL: test_v16f32_mul_sub_x_negone_y:		; FMA-INFS-LABEL: test_v16f32_mul_sub_x_negone_y:
; FMA-INFS: # %bb.0:		; FMA-INFS: # %bb.0:
; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]		; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1		; FMA-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1
; FMA-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0		; FMA-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0
; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0		; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1		; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
; FMA-INFS-NEXT: retq		; FMA-INFS-NEXT: retq
;		;
; FMA4-INFS-LABEL: test_v16f32_mul_sub_x_negone_y:		; FMA4-INFS-LABEL: test_v16f32_mul_sub_x_negone_y:
; FMA4-INFS: # %bb.0:		; FMA4-INFS: # %bb.0:
; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]		; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA4-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1		; FMA4-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1
; FMA4-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0		; FMA4-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0
; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0		; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0
; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1		; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
; FMA4-INFS-NEXT: retq		; FMA4-INFS-NEXT: retq
;		;
; AVX512-INFS-LABEL: test_v16f32_mul_sub_x_negone_y:		; AVX512-INFS-LABEL: test_v16f32_mul_sub_x_negone_y:
; AVX512-INFS: # %bb.0:		; AVX512-INFS: # %bb.0:
; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to16}, %zmm0, %zmm0		; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0		; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0
; AVX512-INFS-NEXT: retq		; AVX512-INFS-NEXT: retq
;		;
; FMA-NOINFS-LABEL: test_v16f32_mul_sub_x_negone_y:		; FMA-NOINFS-LABEL: test_v16f32_mul_sub_x_negone_y:
; FMA-NOINFS: # %bb.0:		; FMA-NOINFS: # %bb.0:
; FMA-NOINFS-NEXT: vfmadd213ps {{.#+}} ymm0 = (ymm2 ymm0) + ymm2		; FMA-NOINFS-NEXT: vfmadd213ps {{.#+}} ymm0 = (ymm2 ymm0) + ymm2
; FMA-NOINFS-NEXT: vfmadd213ps {{.#+}} ymm1 = (ymm3 ymm1) + ymm3		; FMA-NOINFS-NEXT: vfmadd213ps {{.#+}} ymm1 = (ymm3 ymm1) + ymm3
; FMA-NOINFS-NEXT: retq		; FMA-NOINFS-NEXT: retq
Show All 11 Lines	; AVX512-NOINFS-NEXT: retq
%s = fsub <16 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>		%s = fsub <16 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>
%m = fmul <16 x float> %s, %y		%m = fmul <16 x float> %s, %y
ret <16 x float> %m		ret <16 x float> %m
}		}

define <8 x double> @test_v8f64_mul_y_sub_x_negone(<8 x double> %x, <8 x double> %y) {		define <8 x double> @test_v8f64_mul_y_sub_x_negone(<8 x double> %x, <8 x double> %y) {
; FMA-INFS-LABEL: test_v8f64_mul_y_sub_x_negone:		; FMA-INFS-LABEL: test_v8f64_mul_y_sub_x_negone:
; FMA-INFS: # %bb.0:		; FMA-INFS: # %bb.0:
; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]		; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1		; FMA-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1
; FMA-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0		; FMA-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0
; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0		; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1		; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1
; FMA-INFS-NEXT: retq		; FMA-INFS-NEXT: retq
;		;
; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_x_negone:		; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_x_negone:
; FMA4-INFS: # %bb.0:		; FMA4-INFS: # %bb.0:
; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]		; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1		; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1
; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0		; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0
; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0		; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0
; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1		; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1
; FMA4-INFS-NEXT: retq		; FMA4-INFS-NEXT: retq
;		;
; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_x_negone:		; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_x_negone:
; AVX512-INFS: # %bb.0:		; AVX512-INFS: # %bb.0:
; AVX512-INFS-NEXT: vsubpd {{.*}}(%rip){1to8}, %zmm0, %zmm0		; AVX512-INFS-NEXT: vaddpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0		; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0
; AVX512-INFS-NEXT: retq		; AVX512-INFS-NEXT: retq
;		;
; FMA-NOINFS-LABEL: test_v8f64_mul_y_sub_x_negone:		; FMA-NOINFS-LABEL: test_v8f64_mul_y_sub_x_negone:
; FMA-NOINFS: # %bb.0:		; FMA-NOINFS: # %bb.0:
; FMA-NOINFS-NEXT: vfmadd213pd {{.#+}} ymm0 = (ymm2 ymm0) + ymm2		; FMA-NOINFS-NEXT: vfmadd213pd {{.#+}} ymm0 = (ymm2 ymm0) + ymm2
; FMA-NOINFS-NEXT: vfmadd213pd {{.#+}} ymm1 = (ymm3 ymm1) + ymm3		; FMA-NOINFS-NEXT: vfmadd213pd {{.#+}} ymm1 = (ymm3 ymm1) + ymm3
; FMA-NOINFS-NEXT: retq		; FMA-NOINFS-NEXT: retq
▲ Show 20 Lines • Show All 545 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/fp-fold.ll

Show First 20 Lines • Show All 118 Lines • ▼ Show 20 Lines	; ANY-NEXT: retq
%r = fsub nsz reassoc float %y, %add		%r = fsub nsz reassoc float %y, %add
ret float %r		ret float %r
}		}

define <4 x float> @fsub_neg_y_vector(<4 x float> %x, <4 x float> %y) {		define <4 x float> @fsub_neg_y_vector(<4 x float> %x, <4 x float> %y) {
; ANY-LABEL: fsub_neg_y_vector:		; ANY-LABEL: fsub_neg_y_vector:
; ANY: # %bb.0:		; ANY: # %bb.0:
; ANY-NEXT: mulps {{.*}}(%rip), %xmm0		; ANY-NEXT: mulps {{.*}}(%rip), %xmm0
; ANY-NEXT: xorps {{.*}}(%rip), %xmm0
; ANY-NEXT: retq		; ANY-NEXT: retq
%mul = fmul <4 x float> %x, <float 5.0, float 5.0, float 5.0, float 5.0>		%mul = fmul <4 x float> %x, <float 5.0, float 5.0, float 5.0, float 5.0>
%add = fadd <4 x float> %mul, %y		%add = fadd <4 x float> %mul, %y
%r = fsub nsz reassoc <4 x float> %y, %add		%r = fsub nsz reassoc <4 x float> %y, %add
ret <4 x float> %r		ret <4 x float> %r
}		}

define <4 x float> @fsub_neg_y_vector_nonuniform(<4 x float> %x, <4 x float> %y) {		define <4 x float> @fsub_neg_y_vector_nonuniform(<4 x float> %x, <4 x float> %y) {
; ANY-LABEL: fsub_neg_y_vector_nonuniform:		; ANY-LABEL: fsub_neg_y_vector_nonuniform:
; ANY: # %bb.0:		; ANY: # %bb.0:
; ANY-NEXT: mulps {{.*}}(%rip), %xmm0		; ANY-NEXT: mulps {{.*}}(%rip), %xmm0
; ANY-NEXT: xorps {{.*}}(%rip), %xmm0
; ANY-NEXT: retq		; ANY-NEXT: retq
%mul = fmul <4 x float> %x, <float 5.0, float 6.0, float 7.0, float 8.0>		%mul = fmul <4 x float> %x, <float 5.0, float 6.0, float 7.0, float 8.0>
%add = fadd <4 x float> %mul, %y		%add = fadd <4 x float> %mul, %y
%r = fsub nsz reassoc <4 x float> %y, %add		%r = fsub nsz reassoc <4 x float> %y, %add
ret <4 x float> %r		ret <4 x float> %r
}		}

define float @fsub_neg_y_commute(float %x, float %y) {		define float @fsub_neg_y_commute(float %x, float %y) {
; ANY-LABEL: fsub_neg_y_commute:		; ANY-LABEL: fsub_neg_y_commute:
; ANY: # %bb.0:		; ANY: # %bb.0:
; ANY-NEXT: mulss {{.*}}(%rip), %xmm0		; ANY-NEXT: mulss {{.*}}(%rip), %xmm0
; ANY-NEXT: retq		; ANY-NEXT: retq
%mul = fmul float %x, 5.0		%mul = fmul float %x, 5.0
%add = fadd float %y, %mul		%add = fadd float %y, %mul
%r = fsub nsz reassoc float %y, %add		%r = fsub nsz reassoc float %y, %add
ret float %r		ret float %r
}		}

define <4 x float> @fsub_neg_y_commute_vector(<4 x float> %x, <4 x float> %y) {		define <4 x float> @fsub_neg_y_commute_vector(<4 x float> %x, <4 x float> %y) {
; ANY-LABEL: fsub_neg_y_commute_vector:		; ANY-LABEL: fsub_neg_y_commute_vector:
; ANY: # %bb.0:		; ANY: # %bb.0:
; ANY-NEXT: mulps {{.*}}(%rip), %xmm0		; ANY-NEXT: mulps {{.*}}(%rip), %xmm0
; ANY-NEXT: xorps {{.*}}(%rip), %xmm0
; ANY-NEXT: retq		; ANY-NEXT: retq
%mul = fmul <4 x float> %x, <float 5.0, float 5.0, float 5.0, float 5.0>		%mul = fmul <4 x float> %x, <float 5.0, float 5.0, float 5.0, float 5.0>
%add = fadd <4 x float> %y, %mul		%add = fadd <4 x float> %y, %mul
%r = fsub nsz reassoc <4 x float> %y, %add		%r = fsub nsz reassoc <4 x float> %y, %add
ret <4 x float> %r		ret <4 x float> %r
}		}

; Y - (X + Y) --> -X		; Y - (X + Y) --> -X
▲ Show 20 Lines • Show All 57 Lines • ▼ Show 20 Lines
; UNSAFE-NEXT: retq		; UNSAFE-NEXT: retq
%r = fsub float %x, -0.0		%r = fsub float %x, -0.0
ret float %r		ret float %r
}		}

define <4 x float> @fsub_negzero_vector(<4 x float> %x) {		define <4 x float> @fsub_negzero_vector(<4 x float> %x) {
; STRICT-LABEL: fsub_negzero_vector:		; STRICT-LABEL: fsub_negzero_vector:
; STRICT: # %bb.0:		; STRICT: # %bb.0:
; STRICT-NEXT: subps {{.*}}(%rip), %xmm0		; STRICT-NEXT: xorps %xmm1, %xmm1
		; STRICT-NEXT: addps %xmm1, %xmm0
; STRICT-NEXT: retq		; STRICT-NEXT: retq
;		;
; UNSAFE-LABEL: fsub_negzero_vector:		; UNSAFE-LABEL: fsub_negzero_vector:
; UNSAFE: # %bb.0:		; UNSAFE: # %bb.0:
; UNSAFE-NEXT: retq		; UNSAFE-NEXT: retq
%r = fsub <4 x float> %x, <float -0.0, float -0.0, float -0.0, float -0.0>		%r = fsub <4 x float> %x, <float -0.0, float -0.0, float -0.0, float -0.0>
ret <4 x float> %r		ret <4 x float> %r
}		}
▲ Show 20 Lines • Show All 52 Lines • Show Last 20 Lines