Diff 374158

llvm/lib/Target/X86/X86ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 32,713 Lines • ▼ Show 20 Lines
	SDValue RHS = N->getOperand(1);			SDValue RHS = N->getOperand(1);
	int CombineOpcode =			int CombineOpcode =
	N->getOpcode() == X86ISD::VFCMULC ? X86ISD::VFMULC : X86ISD::VFCMULC;			N->getOpcode() == X86ISD::VFCMULC ? X86ISD::VFMULC : X86ISD::VFCMULC;
	auto isConjugationConstant = [](const Constant *c) {			auto isConjugationConstant = [](const Constant *c) {
	if (const auto *CI = dyn_cast<ConstantInt>(c)) {			if (const auto *CI = dyn_cast<ConstantInt>(c)) {
	APInt ConjugationInt32 = APInt(32, 0x80000000, true);			APInt ConjugationInt32 = APInt(32, 0x80000000, true);
	APInt ConjugationInt64 = APInt(64, 0x8000000080000000ULL, true);			APInt ConjugationInt64 = APInt(64, 0x8000000080000000ULL, true);
	switch (CI->getBitWidth()) {			switch (CI->getBitWidth()) {
	case 16:			case 16:
				xbolva00Unsubmitted Not Done Reply Inline Actions Do we really need this output here? Simplify it a bit? Something like you wrote "Combine the FADD(A, FMA(B, C, 0)) to FMA(B, C, A)"? xbolva00: Do we really need this output here? Simplify it a bit? Something like you wrote "Combine the…
				LiuChen3AuthorUnsubmitted Done Reply Inline Actions Good idea. LiuChen3: Good idea.
	return false;			return false;
	case 32:			case 32:
	return CI->getValue() == ConjugationInt32;			return CI->getValue() == ConjugationInt32;
	case 64:			case 64:
	return CI->getValue() == ConjugationInt64;			return CI->getValue() == ConjugationInt64;
	default:			default:
	llvm_unreachable("Unexpected bit width");			llvm_unreachable("Unexpected bit width");
	}			}
	Show All 14 Lines
	ConstantPoolSDNode *CP =			ConstantPoolSDNode *CP =
	dyn_cast<ConstantPoolSDNode>(XORRHS.getOperand(1).getOperand(0));			dyn_cast<ConstantPoolSDNode>(XORRHS.getOperand(1).getOperand(0));
	if (CP && isConjugationConstant(CP->getConstVal())) {			if (CP && isConjugationConstant(CP->getConstVal())) {
	SelectionDAG::FlagInserter FlagsInserter(DAG, N);			SelectionDAG::FlagInserter FlagsInserter(DAG, N);
	SDValue I2F = DAG.getBitcast(VT, LHS.getOperand(0).getOperand(0));			SDValue I2F = DAG.getBitcast(VT, LHS.getOperand(0).getOperand(0));
	SDValue FCMulC = DAG.getNode(CombineOpcode, SDLoc(N), VT, RHS, I2F);			SDValue FCMulC = DAG.getNode(CombineOpcode, SDLoc(N), VT, RHS, I2F);
	r = DAG.getBitcast(VT, FCMulC);			r = DAG.getBitcast(VT, FCMulC);
	return true;			return true;
	}			}
				pengfeiUnsubmitted Not Done Reply Inline Actions We don't need else after return. See the Lint comment. pengfei: We don't need else after return. See the Lint comment.
	}			}
	}			}
				pengfeiUnsubmitted Not Done Reply Inline Actions indent pengfei: indent
	}			}
	return false;			return false;
	};			};
				pengfeiUnsubmitted Not Done Reply Inline Actions Why we still need this? pengfei: Why we still need this?
				LiuChen3AuthorUnsubmitted Done Reply Inline Actions We need transfer FMA(A, B 0) to MUL(A, B) firstly. LiuChen3: We need transfer FMA(A, B 0) to MUL(A, B) firstly.
				LiuChen3AuthorUnsubmitted Done Reply Inline Actions My bad. I got what's your mean. LiuChen3: My bad. I got what's your mean.
	SDValue Res;			SDValue Res;
				pengfeiUnsubmitted Not Done Reply Inline Actions Can these be MulOp0 = Op0->getOperand(1); MulOp1 = Op0->getOperand(2); pengfei: Can these be ``` MulOp0 = Op0->getOperand(1); MulOp1 = Op0->getOperand(2); ```
	if (combineConjugation(Res))			if (combineConjugation(Res))
	return Res;			return Res;
	std::swap(LHS, RHS);			std::swap(LHS, RHS);
	if (combineConjugation(Res))			if (combineConjugation(Res))
				pengfeiUnsubmitted Not Done Reply Inline Actions I think we can use `bool IsConj`, `SDValue MulOp0, MulOp0` instead of `CFmul`. Then you don't need to create a temp mul node. pengfei: I think we can use `bool IsConj`, `SDValue MulOp0, MulOp0` instead of `CFmul`. Then you don't…
				pengfeiUnsubmitted Not Done Reply Inline Actions I think we can then use if ((Opcode == X86ISD::VFMULC \|\| Opcode == X86ISD::VFCMULC)) { ... return true; } if ((Opcode == X86ISD::VFMADDC \|\| Opcode == X86ISD::VFCMADDC) ... { ... return true; } return false; pengfei: I think we can then use ``` if ((Opcode == X86ISD::VFMULC \|\| Opcode == X86ISD::VFCMULC)) { ...
				LiuChen3AuthorUnsubmitted Done Reply Inline Actions It seems we create more temp node. Is it better? LiuChen3: It seems we create more temp node. Is it better?
				pengfeiUnsubmitted Not Done Reply Inline Actions They are temp variables rather than nodes. And compiler may likly optimize them. pengfei: They are temp variables rather than nodes. And compiler may likly optimize them.
	return Res;			return Res;
	return Res;			return Res;
	}			}

	// Try to combine the following nodes			// This function transforms complex FADD(A, FMA(B, C, 0)) and FADD(A, FMUL(B, C))
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -// This function transforms complex FADD(A, FMA(B, C, 0)) and FADD(A, FMUL(B, C)) -// to FMA(B, C, A). +// This function transforms complex FADD(A, FMA(B, C, 0)) and FADD(A, FMUL(B, +// C)) to FMA(B, C, A). Lint: Pre-merge checks: clang-format: please reformat the code ``` -// This function transforms complex FADD(A, FMA(B…
	// t21: v16f32 = X86ISD::VFMULC/VFCMULC t7, t8			// to FMA(B, C, A).
	// t15: v32f16 = bitcast t21
	// t16: v32f16 = fadd nnan ninf nsz arcp contract afn reassoc t15, t2
	// into X86ISD::VFMADDC/VFCMADDC if possible:
	// t22: v16f32 = bitcast t2
	// t23: v16f32 = nnan ninf nsz arcp contract afn reassoc
	// X86ISD::VFMADDC/VFCMADDC t7, t8, t22
	// t24: v32f16 = bitcast t23
	static SDValue combineFaddCFmul(SDNode *N, SelectionDAG &DAG,			static SDValue combineFaddCFmul(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {			const X86Subtarget &Subtarget) {
				pengfeiUnsubmitted Not Done Reply Inline Actions Better to add parentheses. pengfei: Better to add parentheses.
	auto AllowContract = [&DAG](SDNode *N) {			// Check the -ffp-contract option or the fast-math flag of SDNode.
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -// Check the -ffp-contract option or the fast-math flag of SDNode. -auto AllowContract = [&DAG](const SDNodeFlags &Flags) { - return DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast \|\| - (Flags.hasNoNaNs() && Flags.hasNoInfs() && Flags.hasNoSignedZeros() && - Flags.hasAllowReciprocal() && Flags.hasAllowContract() && - Flags.hasApproximateFuncs() && Flags.hasAllowReassociation()); -}; + // Check the -ffp-contract option or the fast-math flag of SDNode. + auto AllowContract = [&DAG](const SDNodeFlags &Flags) { + return DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast \|\| 5 diff lines are omitted. See full path. Lint: Pre-merge checks: clang-format: please reformat the code ``` -// Check the -ffp-contract option or the fast-math…
	return DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast \|\|			auto AllowContract = [&DAG](const SDNodeFlags &Flags) {
	N->getFlags().hasAllowContract();			return DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast \|\|
				LiuChen3AuthorUnsubmitted Done Reply Inline Actions Maybe we can just check hasNoSignedZeros() and hasAllowContract() as pengfei said? LiuChen3: Maybe we can just check hasNoSignedZeros() and hasAllowContract() as pengfei said?
				pengfeiUnsubmitted Not Done Reply Inline Actions Yeah, I prefer to checking both in line 47582. pengfei: Yeah, I prefer to checking both in line 47582.
	};			(Flags.hasNoNaNs() && Flags.hasNoInfs() && Flags.hasNoSignedZeros() &&
				pengfeiUnsubmitted Not Done Reply Inline Actions I think we can remove the assert now. pengfei: I think we can remove the assert now.
	if (N->getOpcode() != ISD::FADD \|\| !Subtarget.hasFP16() \|\| !AllowContract(N))			Flags.hasAllowReciprocal() && Flags.hasAllowContract() &&
	return SDValue();			Flags.hasApproximateFuncs() && Flags.hasAllowReassociation());
				};

	EVT VT = N->getValueType(0);			auto IsVectorAllNegativeZero = [](const SDNode *N) {
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -auto IsVectorAllNegativeZero = [](const SDNode N) { - if (N->getOpcode() != X86ISD::VBROADCAST_LOAD) + auto IsVectorAllNegativeZero = [](const SDNode N) { + if (N->getOpcode() != X86ISD::VBROADCAST_LOAD) + return false; + assert(N->getSimpleValueType(0).getScalarType() == MVT::f32 && + "Unexpected vector type!"); + if (ConstantPoolSDNode CP = + dyn_cast<ConstantPoolSDNode>(N->getOperand(1)->getOperand(0))) { + APInt AI = APInt(32, 0x80008000, true); 5 diff lines are omitted. See full path. Lint: Pre-merge checks:* clang-format: please reformat the code ``` -auto IsVectorAllNegativeZero = [](const SDNode *N)…
	if (VT != MVT::v8f16 && VT != MVT::v16f16 && VT != MVT::v32f16)			if (N->getOpcode() != X86ISD::VBROADCAST_LOAD)
	return SDValue();			return false;
				assert(N->getSimpleValueType(0).getScalarType() == MVT::f32 &&
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code - assert(N->getSimpleValueType(0).getScalarType() == MVT::f32 && - "Unexpected vector type!"); - if (ConstantPoolSDNode CP = - dyn_cast<ConstantPoolSDNode>(N->getOperand(1)->getOperand(0))) { - APInt AI = APInt(32, 0x80008000, true); - if (const auto CI = dyn_cast<ConstantInt>(CP->getConstVal())) - return CI->getValue() == AI; - if (const auto CF = dyn_cast<ConstantFP>(CP->getConstVal())) - return CF->getValue() == APFloat(APFloat::IEEEsingle(), AI); - } 3 diff lines are omitted. See full path. Lint: Pre-merge checks:* clang-format: please reformat the code ``` - assert(N->getSimpleValueType(0).getScalarType()…
				"Unexpected vector type!");
				if (ConstantPoolSDNode *CP =
				dyn_cast<ConstantPoolSDNode>(N->getOperand(1)->getOperand(0))) {
				APInt AI = APInt(32, 0x80008000, true);
				if (const auto *CI = dyn_cast<ConstantInt>(CP->getConstVal()))
				return CI->getValue() == AI;
				if (const auto *CF = dyn_cast<ConstantFP>(CP->getConstVal()))
				return CF->getValue() == APFloat(APFloat::IEEEsingle(), AI);
				}
				return false;
				};

	SDValue LHS = N->getOperand(0);			if (N->getOpcode() != ISD::FADD \|\| !Subtarget.hasFP16() \|\|
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -if (N->getOpcode() != ISD::FADD \|\| !Subtarget.hasFP16() \|\| - !AllowContract(N->getFlags())) - return SDValue(); + if (N->getOpcode() != ISD::FADD \|\| !Subtarget.hasFP16() \|\| + !AllowContract(N->getFlags())) + return SDValue(); Lint: Pre-merge checks: clang-format: please reformat the code ``` -if (N->getOpcode() != ISD::FADD \|\| !Subtarget.
	SDValue RHS = N->getOperand(1);			!AllowContract(N->getFlags()))
	SDValue CFmul, FAddOp1;			return SDValue();
	auto GetCFmulFrom = [&CFmul, &AllowContract](SDValue N) -> bool {
	if (!N.hasOneUse() \|\| N.getOpcode() != ISD::BITCAST)
	return false;
	SDValue Op0 = N.getOperand(0);
	unsigned Opcode = Op0.getOpcode();
	if (Op0.hasOneUse() && AllowContract(Op0.getNode()) &&
	(Opcode == X86ISD::VFMULC \|\| Opcode == X86ISD::VFCMULC))
	CFmul = Op0;
	return !!CFmul;
	};

	if (GetCFmulFrom(LHS))			EVT VT = N->getValueType(0);
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -EVT VT = N->getValueType(0); -if (VT != MVT::v8f16 && VT != MVT::v16f16 && VT != MVT::v32f16) - return SDValue(); + EVT VT = N->getValueType(0); + if (VT != MVT::v8f16 && VT != MVT::v16f16 && VT != MVT::v32f16) + return SDValue(); Lint: Pre-merge checks: clang-format: please reformat the code ``` -EVT VT = N->getValueType(0); -if (VT != MVT::v8f16…
	FAddOp1 = RHS;			if (VT != MVT::v8f16 && VT != MVT::v16f16 && VT != MVT::v32f16)
	else if (GetCFmulFrom(RHS))			return SDValue();
	FAddOp1 = LHS;
	else			SDValue LHS = N->getOperand(0);
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -SDValue LHS = N->getOperand(0); -SDValue RHS = N->getOperand(1); -bool IsConj; -SDValue FAddOp1, MulOp0, MulOp1; -auto GetCFmulFrom = [&MulOp0, &MulOp1, &IsConj, &AllowContract, - &IsVectorAllNegativeZero](SDValue N) -> bool { - if (!N.hasOneUse() \|\| N.getOpcode() != ISD::BITCAST) - return false; - SDValue Op0 = N.getOperand(0); - unsigned Opcode = Op0.getOpcode(); 43 diff lines are omitted. See full path. Lint: Pre-merge checks: clang-format: please reformat the code ``` -SDValue LHS = N->getOperand(0); -SDValue RHS = N…
	return SDValue();			SDValue RHS = N->getOperand(1);
				bool IsConj;
				SDValue FAddOp1, MulOp0, MulOp1;
				auto GetCFmulFrom = [&MulOp0, &MulOp1, &IsConj, &AllowContract,
				&IsVectorAllNegativeZero](SDValue N) -> bool {
				if (!N.hasOneUse() \|\| N.getOpcode() != ISD::BITCAST)
				return false;
				SDValue Op0 = N.getOperand(0);
				unsigned Opcode = Op0.getOpcode();
				if (Op0.hasOneUse()) {
				if ((Opcode == X86ISD::VFMULC \|\| Opcode == X86ISD::VFCMULC) &&
				AllowContract(Op0->getFlags())) {
				MulOp0 = Op0.getOperand(0);
				MulOp1 = Op0.getOperand(1);
				IsConj = Opcode == X86ISD::VFCMULC;
				return true;
				}
				if ((Opcode == X86ISD::VFMADDC \|\| Opcode == X86ISD::VFCMADDC) &&
				((ISD::isBuildVectorAllZeros(Op0->getOperand(0).getNode()) &&
				pengfeiUnsubmitted Not Done Reply Inline Actions Should this be AllowContract(Op0->getFlags()) && (ISD::isBuildVectorAllZeros(Op0->getOperand(0).getNode()) && Op0->getFlags().hasNoSignedZeros()) \|\| IsVectorAllNegativeZero(Op0->getOperand(0).getNode())) I.e, check `AllowContract` together with `IsVectorAllNegativeZero` as well. pengfei: Should this be ``` AllowContract(Op0->getFlags()) && (ISD::isBuildVectorAllZeros(Op0…
				LiuChen3AuthorUnsubmitted Done Reply Inline Actions AllowContract will check hasNoSignedZeros(). It seems that we can only do this combination when the fast-math flag is set, No matter if the third operand is +0.0 or 0.0. +0.0 or -0.0 affects the conversion of FMA(a, b, ±0.0) to FMUL(a, b). LiuChen3: AllowContract will check hasNoSignedZeros(). It seems that we can only do this combination when…
				AllowContract(Op0->getFlags())) \|\|
				IsVectorAllNegativeZero(Op0->getOperand(0).getNode()))) {
				MulOp0 = Op0.getOperand(1);
				MulOp1 = Op0.getOperand(2);
				IsConj = Opcode == X86ISD::VFCMADDC;
				return true;
				}
				}
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code - } - return false; -}; + return false; + }; Lint: Pre-merge checks: clang-format: please reformat the code ``` - } - return false; -}; + return false; + }…
				return false;
				};

				if (GetCFmulFrom(LHS))
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -if (GetCFmulFrom(LHS)) - FAddOp1 = RHS; -else if (GetCFmulFrom(RHS)) - FAddOp1 = LHS; -else - return SDValue(); + if (GetCFmulFrom(LHS)) + FAddOp1 = RHS; + else if (GetCFmulFrom(RHS)) + FAddOp1 = LHS; 2 diff lines are omitted. See full path. Lint: Pre-merge checks: clang-format: please reformat the code ``` -if (GetCFmulFrom(LHS)) - FAddOp1 = RHS; -else if…
				FAddOp1 = RHS;
				else if (GetCFmulFrom(RHS))
				FAddOp1 = LHS;
				else
				return SDValue();

	MVT CVT = MVT::getVectorVT(MVT::f32, VT.getVectorNumElements() / 2);			MVT CVT = MVT::getVectorVT(MVT::f32, VT.getVectorNumElements() / 2);
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -MVT CVT = MVT::getVectorVT(MVT::f32, VT.getVectorNumElements() / 2); -FAddOp1 = DAG.getBitcast(CVT, FAddOp1); -unsigned NewOp = IsConj ? X86ISD::VFCMADDC : X86ISD::VFMADDC; -// FIXME: How do we handle when fast math flags of FADD are different from -// CFMUL's? -SDValue CFmul = - DAG.getNode(NewOp, SDLoc(N), CVT, FAddOp1, MulOp0, MulOp1, N->getFlags()); -return DAG.getBitcast(VT, CFmul); + MVT CVT = MVT::getVectorVT(MVT::f32, VT.getVectorNumElements() / 2); + FAddOp1 = DAG.getBitcast(CVT, FAddOp1); 6 diff lines are omitted. See full path. Lint: Pre-merge checks: clang-format: please reformat the code ``` -MVT CVT = MVT::getVectorVT(MVT::f32, VT.
	assert(CFmul->getValueType(0) == CVT && "Complex type mismatch");			FAddOp1 = DAG.getBitcast(CVT, FAddOp1);
	FAddOp1 = DAG.getBitcast(CVT, FAddOp1);			unsigned NewOp = IsConj ? X86ISD::VFCMADDC : X86ISD::VFMADDC;
	unsigned newOp = CFmul.getOpcode() == X86ISD::VFMULC ? X86ISD::VFMADDC			// FIXME: How do we handle when fast math flags of FADD are different from
	: X86ISD::VFCMADDC;			// CFMUL's?
	// FIXME: How do we handle when fast math flags of FADD are different from			SDValue CFmul =
	// CFMUL's?			DAG.getNode(NewOp, SDLoc(N), CVT, FAddOp1, MulOp0, MulOp1, N->getFlags());
	CFmul = DAG.getNode(newOp, SDLoc(N), CVT, FAddOp1, CFmul.getOperand(0),			return DAG.getBitcast(VT, CFmul);
	CFmul.getOperand(1), N->getFlags());
	return DAG.getBitcast(VT, CFmul);
	}			}

	/// Do target-specific dag combines on floating-point adds/subs.			/// Do target-specific dag combines on floating-point adds/subs.
	static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG,			static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {			const X86Subtarget &Subtarget) {
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code - const X86Subtarget &Subtarget) { -if (SDValue HOp = combineToHorizontalAddSub(N, DAG, Subtarget)) - return HOp; + const X86Subtarget &Subtarget) { + if (SDValue HOp = combineToHorizontalAddSub(N, DAG, Subtarget)) + return HOp; Lint: Pre-merge checks: clang-format: please reformat the code ``` - const X86Subtarget…
	if (SDValue HOp = combineToHorizontalAddSub(N, DAG, Subtarget))			if (SDValue HOp = combineToHorizontalAddSub(N, DAG, Subtarget))
	return HOp;			return HOp;

	if (SDValue COp = combineFaddCFmul(N, DAG, Subtarget))			if (SDValue COp = combineFaddCFmul(N, DAG, Subtarget))
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -if (SDValue COp = combineFaddCFmul(N, DAG, Subtarget)) - return COp; + if (SDValue COp = combineFaddCFmul(N, DAG, Subtarget)) + return COp; Lint: Pre-merge checks: clang-format: please reformat the code ``` -if (SDValue COp = combineFaddCFmul(N, DAG…
	return COp;			return COp;

	return SDValue();			return SDValue();
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -return SDValue(); + return SDValue(); Lint: Pre-merge checks: clang-format: please reformat the code ``` -return SDValue(); + return SDValue(); ```
	}			}
				pengfeiUnsubmitted Not Done Reply Inline Actions This seems been changed unconsciously. pengfei: This seems been changed unconsciously.
				LiuChen3AuthorUnsubmitted Done Reply Inline Actions Sorry for this. Looks like I accidentally do some change here. LiuChen3: Sorry for this. Looks like I accidentally do some change here.

	/// Attempt to pre-truncate inputs to arithmetic ops if it will simplify			/// Attempt to pre-truncate inputs to arithmetic ops if it will simplify
	/// the codegen.			/// the codegen.
	/// e.g. TRUNC( BINOP( X, Y ) ) --> BINOP( TRUNC( X ), TRUNC( Y ) )			/// e.g. TRUNC( BINOP( X, Y ) ) --> BINOP( TRUNC( X ), TRUNC( Y ) )
	/// TODO: This overlaps with the generic combiner's visitTRUNCATE. Remove			/// TODO: This overlaps with the generic combiner's visitTRUNCATE. Remove
	/// anything that is guaranteed to be transformed by DAGCombiner.			/// anything that is guaranteed to be transformed by DAGCombiner.
	static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,			static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget,			const X86Subtarget &Subtarget,
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code - const X86Subtarget &Subtarget, - const SDLoc &DL) { -assert(N->getOpcode() == ISD::TRUNCATE && "Wrong opcode"); -SDValue Src = N->getOperand(0); -unsigned SrcOpcode = Src.getOpcode(); -const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const X86Subtarget &Subtarget, + const SDLoc &DL) { + assert(N->getOpcode() == ISD::TRUNCATE && "Wrong opcode"); + SDValue Src = N->getOperand(0); 2 diff lines are omitted. See full path. Lint: Pre-merge checks: clang-format: please reformat the code ``` - const…
	const SDLoc &DL) {			const SDLoc &DL) {
	assert(N->getOpcode() == ISD::TRUNCATE && "Wrong opcode");			assert(N->getOpcode() == ISD::TRUNCATE && "Wrong opcode");
	SDValue Src = N->getOperand(0);			SDValue Src = N->getOperand(0);
	unsigned SrcOpcode = Src.getOpcode();			unsigned SrcOpcode = Src.getOpcode();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();			const TargetLowering &TLI = DAG.getTargetLoweringInfo();
				pengfeiUnsubmitted Not Done Reply Inline Actions The indentation is wrong too. The same below. pengfei: The indentation is wrong too. The same below.

	EVT VT = N->getValueType(0);
	EVT SrcVT = Src.getValueType();

	auto IsFreeTruncation = [VT](SDValue Op) {
	unsigned TruncSizeInBits = VT.getScalarSizeInBits();

	// See if this has been extended from a smaller/equal size to			EVT VT = N->getValueType(0);
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -EVT VT = N->getValueType(0); -EVT SrcVT = Src.getValueType(); + EVT VT = N->getValueType(0); + EVT SrcVT = Src.getValueType(); Lint: Pre-merge checks: clang-format: please reformat the code ``` -EVT VT = N->getValueType(0); -EVT SrcVT = Src.
	// the truncation size, allowing a truncation to combine with the extend.			EVT SrcVT = Src.getValueType();
	unsigned Opcode = Op.getOpcode();
	if ((Opcode == ISD::ANY_EXTEND \|\| Opcode == ISD::SIGN_EXTEND \|\|
	Opcode == ISD::ZERO_EXTEND) &&
	Op.getOperand(0).getScalarValueSizeInBits() <= TruncSizeInBits)
	return true;

	// See if this is a single use constant which can be constant folded.			auto IsFreeTruncation = [VT](SDValue Op) {
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -auto IsFreeTruncation = [VT](SDValue Op) { - unsigned TruncSizeInBits = VT.getScalarSizeInBits(); + auto IsFreeTruncation = [VT](SDValue Op) { + unsigned TruncSizeInBits = VT.getScalarSizeInBits(); Lint: Pre-merge checks: clang-format: please reformat the code ``` -auto IsFreeTruncation = [VT](SDValue Op) {…
	// NOTE: We don't peek throught bitcasts here because there is currently			unsigned TruncSizeInBits = VT.getScalarSizeInBits();
	// no support for constant folding truncate+bitcast+vector_of_constants. So
	// we'll just send up with a truncate on both operands which will
	// get turned back into (truncate (binop)) causing an infinite loop.
	return ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
	};

	auto TruncateArithmetic = [&](SDValue N0, SDValue N1) {			// See if this has been extended from a smaller/equal size to
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code - // See if this has been extended from a smaller/equal size to - // the truncation size, allowing a truncation to combine with the extend. - unsigned Opcode = Op.getOpcode(); - if ((Opcode == ISD::ANY_EXTEND \|\| Opcode == ISD::SIGN_EXTEND \|\| - Opcode == ISD::ZERO_EXTEND) && - Op.getOperand(0).getScalarValueSizeInBits() <= TruncSizeInBits) - return true; + // See if this has been extended from a smaller/equal size to + // the truncation size, allowing a truncation to combine with the extend. + unsigned Opcode = Op.getOpcode(); 4 diff lines are omitted. See full path. Lint: Pre-merge checks: clang-format: please reformat the code ``` - // See if this has been extended from a…
	SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, VT, N0);			// the truncation size, allowing a truncation to combine with the extend.
	SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, VT, N1);			unsigned Opcode = Op.getOpcode();
	return DAG.getNode(SrcOpcode, DL, VT, Trunc0, Trunc1);			if ((Opcode == ISD::ANY_EXTEND \|\| Opcode == ISD::SIGN_EXTEND \|\|
	};			Opcode == ISD::ZERO_EXTEND) &&
				Op.getOperand(0).getScalarValueSizeInBits() <= TruncSizeInBits)
				return true;

	// Don't combine if the operation has other uses.			// See if this is a single use constant which can be constant folded.
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code - // See if this is a single use constant which can be constant folded. - // NOTE: We don't peek throught bitcasts here because there is currently - // no support for constant folding truncate+bitcast+vector_of_constants. So - // we'll just send up with a truncate on both operands which will - // get turned back into (truncate (binop)) causing an infinite loop. - return ISD::isBuildVectorOfConstantSDNodes(Op.getNode()); -}; + // See if this is a single use constant which can be constant folded. + // NOTE: We don't peek throught bitcasts here because there is currently + // no support for constant folding truncate+bitcast+vector_of_constants. So 4 diff lines are omitted. See full path. Lint: Pre-merge checks: clang-format: please reformat the code ``` - // See if this is a single use constant which can…
	if (!Src.hasOneUse())			// NOTE: We don't peek throught bitcasts here because there is currently
	return SDValue();			// no support for constant folding truncate+bitcast+vector_of_constants. So
				// we'll just send up with a truncate on both operands which will
				// get turned back into (truncate (binop)) causing an infinite loop.
				return ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
				};

	// Only support vector truncation for now.			auto TruncateArithmetic = [&](SDValue N0, SDValue N1) {
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -auto TruncateArithmetic = [&](SDValue N0, SDValue N1) { - SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, VT, N0); - SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, VT, N1); - return DAG.getNode(SrcOpcode, DL, VT, Trunc0, Trunc1); -}; + auto TruncateArithmetic = [&](SDValue N0, SDValue N1) { + SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, VT, N0); + SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, VT, N1); + return DAG.getNode(SrcOpcode, DL, VT, Trunc0, Trunc1); + }; Lint: Pre-merge checks: clang-format: please reformat the code ``` -auto TruncateArithmetic = [&](SDValue N0, SDValue…
	// TODO: i64 scalar math would benefit as well.			SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
	if (!VT.isVector())			SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
	return SDValue();			return DAG.getNode(SrcOpcode, DL, VT, Trunc0, Trunc1);
				};

	// In most cases its only worth pre-truncating if we're only facing the cost			// Don't combine if the operation has other uses.
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -// Don't combine if the operation has other uses. -if (!Src.hasOneUse()) - return SDValue(); + // Don't combine if the operation has other uses. + if (!Src.hasOneUse()) + return SDValue(); Lint: Pre-merge checks: clang-format: please reformat the code ``` -// Don't combine if the operation has other uses.
	// of one truncation.			if (!Src.hasOneUse())
	// i.e. if one of the inputs will constant fold or the input is repeated.			return SDValue();
	switch (SrcOpcode) {
	case ISD::MUL:
	// X86 is rubbish at scalar and vector i64 multiplies (until AVX512DQ) - its
	// better to truncate if we have the chance.
	if (SrcVT.getScalarType() == MVT::i64 &&
	TLI.isOperationLegal(SrcOpcode, VT) &&
	!TLI.isOperationLegal(SrcOpcode, SrcVT))
	return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1));
	LLVM_FALLTHROUGH;
	case ISD::AND:
	case ISD::XOR:
	case ISD::OR:
	case ISD::ADD:
	case ISD::SUB: {
	SDValue Op0 = Src.getOperand(0);
	SDValue Op1 = Src.getOperand(1);
	if (TLI.isOperationLegal(SrcOpcode, VT) &&
	(Op0 == Op1 \|\| IsFreeTruncation(Op0) \|\| IsFreeTruncation(Op1)))
	return TruncateArithmetic(Op0, Op1);
	break;
	}
	}

				// Only support vector truncation for now.
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -// Only support vector truncation for now. -// TODO: i64 scalar math would benefit as well. -if (!VT.isVector()) - return SDValue(); + // Only support vector truncation for now. + // TODO: i64 scalar math would benefit as well. + if (!VT.isVector()) + return SDValue(); Lint: Pre-merge checks: clang-format: please reformat the code ``` -// Only support vector truncation for now. -// TODO…
				// TODO: i64 scalar math would benefit as well.
				if (!VT.isVector())
	return SDValue();			return SDValue();

				// In most cases its only worth pre-truncating if we're only facing the cost
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -// In most cases its only worth pre-truncating if we're only facing the cost -// of one truncation. -// i.e. if one of the inputs will constant fold or the input is repeated. -switch (SrcOpcode) { -case ISD::MUL: - // X86 is rubbish at scalar and vector i64 multiplies (until AVX512DQ) - its - // better to truncate if we have the chance. - if (SrcVT.getScalarType() == MVT::i64 && - TLI.isOperationLegal(SrcOpcode, VT) && - !TLI.isOperationLegal(SrcOpcode, SrcVT)) 44 diff lines are omitted. See full path. Lint: Pre-merge checks: clang-format: please reformat the code ``` -// In most cases its only worth pre-truncating if…
				// of one truncation.
				// i.e. if one of the inputs will constant fold or the input is repeated.
				switch (SrcOpcode) {
				case ISD::MUL:
				// X86 is rubbish at scalar and vector i64 multiplies (until AVX512DQ) - its
				// better to truncate if we have the chance.
				if (SrcVT.getScalarType() == MVT::i64 &&
				TLI.isOperationLegal(SrcOpcode, VT) &&
				!TLI.isOperationLegal(SrcOpcode, SrcVT))
				return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1));
				LLVM_FALLTHROUGH;
				case ISD::AND:
				case ISD::XOR:
				case ISD::OR:
				case ISD::ADD:
				case ISD::SUB: {
				SDValue Op0 = Src.getOperand(0);
				SDValue Op1 = Src.getOperand(1);
				if (TLI.isOperationLegal(SrcOpcode, VT) &&
				(Op0 == Op1 \|\| IsFreeTruncation(Op0) \|\| IsFreeTruncation(Op1)))
				return TruncateArithmetic(Op0, Op1);
				break;
				}
				}

				return SDValue();
	}			}

	/// Truncate using ISD::AND mask and X86ISD::PACKUS.			/// Truncate using ISD::AND mask and X86ISD::PACKUS.
	/// e.g. trunc <8 x i32> X to <8 x i16> -->			/// e.g. trunc <8 x i32> X to <8 x i16> -->
	/// MaskX = X & 0xffff (clear high bits to prevent saturation)			/// MaskX = X & 0xffff (clear high bits to prevent saturation)
	/// packus (extract_subv MaskX, 0), (extract_subv MaskX, 1)			/// packus (extract_subv MaskX, 0), (extract_subv MaskX, 1)
	static SDValue combineVectorTruncationWithPACKUS(SDNode *N, const SDLoc &DL,			static SDValue combineVectorTruncationWithPACKUS(SDNode *N, const SDLoc &DL,
	const X86Subtarget &Subtarget,			const X86Subtarget &Subtarget,
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { -SDValue In = N->getOperand(0); -EVT InVT = In.getValueType(); -EVT OutVT = N->getValueType(0); + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { + SDValue In = N->getOperand(0); + EVT InVT = In.getValueType(); + EVT OutVT = N->getValueType(0); Lint: Pre-merge checks: clang-format: please reformat the code ``` - …
	SelectionDAG &DAG) {			SelectionDAG &DAG) {
	SDValue In = N->getOperand(0);			SDValue In = N->getOperand(0);
	EVT InVT = In.getValueType();			EVT InVT = In.getValueType();
	EVT OutVT = N->getValueType(0);			EVT OutVT = N->getValueType(0);

	APInt Mask = APInt::getLowBitsSet(InVT.getScalarSizeInBits(),			APInt Mask = APInt::getLowBitsSet(InVT.getScalarSizeInBits(),
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -APInt Mask = APInt::getLowBitsSet(InVT.getScalarSizeInBits(), - OutVT.getScalarSizeInBits()); -In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT)); -return truncateVectorWithPACK(X86ISD::PACKUS, OutVT, In, DL, DAG, Subtarget); + APInt Mask = APInt::getLowBitsSet(InVT.getScalarSizeInBits(), + OutVT.getScalarSizeInBits()); + In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT)); + return truncateVectorWithPACK(X86ISD::PACKUS, OutVT, In, DL, DAG, Subtarget); Lint: Pre-merge checks: clang-format: please reformat the code ``` -APInt Mask = APInt::getLowBitsSet(InVT.
	OutVT.getScalarSizeInBits());			OutVT.getScalarSizeInBits());
	In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT));			In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT));
	return truncateVectorWithPACK(X86ISD::PACKUS, OutVT, In, DL, DAG, Subtarget);			return truncateVectorWithPACK(X86ISD::PACKUS, OutVT, In, DL, DAG, Subtarget);
	}			}

	/// Truncate a group of v4i32 into v8i16 using X86ISD::PACKSS.			/// Truncate a group of v4i32 into v8i16 using X86ISD::PACKSS.
	static SDValue combineVectorTruncationWithPACKSS(SDNode *N, const SDLoc &DL,			static SDValue combineVectorTruncationWithPACKSS(SDNode *N, const SDLoc &DL,
	const X86Subtarget &Subtarget,			const X86Subtarget &Subtarget,
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { -SDValue In = N->getOperand(0); -EVT InVT = In.getValueType(); -EVT OutVT = N->getValueType(0); -In = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, InVT, In, - DAG.getValueType(OutVT)); -return truncateVectorWithPACK(X86ISD::PACKSS, OutVT, In, DL, DAG, Subtarget); + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { 6 diff lines are omitted. See full path. Lint: Pre-merge checks: clang-format: please reformat the code ``` - …
	SelectionDAG &DAG) {			SelectionDAG &DAG) {
	SDValue In = N->getOperand(0);			SDValue In = N->getOperand(0);
	EVT InVT = In.getValueType();			EVT InVT = In.getValueType();
	EVT OutVT = N->getValueType(0);			EVT OutVT = N->getValueType(0);
	In = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, InVT, In,			In = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, InVT, In,
	DAG.getValueType(OutVT));			DAG.getValueType(OutVT));
	return truncateVectorWithPACK(X86ISD::PACKSS, OutVT, In, DL, DAG, Subtarget);			return truncateVectorWithPACK(X86ISD::PACKSS, OutVT, In, DL, DAG, Subtarget);
	}			}

	/// This function transforms truncation from vXi32/vXi64 to vXi8/vXi16 into			/// This function transforms truncation from vXi32/vXi64 to vXi8/vXi16 into
	/// X86ISD::PACKUS/X86ISD::PACKSS operations. We do it here because after type			/// X86ISD::PACKUS/X86ISD::PACKSS operations. We do it here because after type
	/// legalization the truncation will be translated into a BUILD_VECTOR with each			/// legalization the truncation will be translated into a BUILD_VECTOR with each
	/// element that is extracted from a vector and then truncated, and it is			/// element that is extracted from a vector and then truncated, and it is
	/// difficult to do this optimization based on them.			/// difficult to do this optimization based on them.
	static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG,			static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {			const X86Subtarget &Subtarget) {
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code - const X86Subtarget &Subtarget) { -EVT OutVT = N->getValueType(0); -if (!OutVT.isVector()) - return SDValue(); + const X86Subtarget &Subtarget) { + EVT OutVT = N->getValueType(0); + if (!OutVT.isVector()) + return SDValue(); Lint: Pre-merge checks: clang-format: please reformat the code ``` - const…
	EVT OutVT = N->getValueType(0);			EVT OutVT = N->getValueType(0);
	if (!OutVT.isVector())			if (!OutVT.isVector())
	return SDValue();			return SDValue();

	SDValue In = N->getOperand(0);			SDValue In = N->getOperand(0);
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -SDValue In = N->getOperand(0); -if (!In.getValueType().isSimple()) - return SDValue(); + SDValue In = N->getOperand(0); + if (!In.getValueType().isSimple()) + return SDValue(); Lint: Pre-merge checks: clang-format: please reformat the code ``` -SDValue In = N->getOperand(0); -if (!In.
	if (!In.getValueType().isSimple())			if (!In.getValueType().isSimple())
	return SDValue();			return SDValue();

	EVT InVT = In.getValueType();			EVT InVT = In.getValueType();
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -EVT InVT = In.getValueType(); -unsigned NumElems = OutVT.getVectorNumElements(); + EVT InVT = In.getValueType(); + unsigned NumElems = OutVT.getVectorNumElements(); Lint: Pre-merge checks: clang-format: please reformat the code ``` -EVT InVT = In.getValueType(); -unsigned NumElems =…
	unsigned NumElems = OutVT.getVectorNumElements();			unsigned NumElems = OutVT.getVectorNumElements();

	// AVX512 provides fast truncate ops.			// AVX512 provides fast truncate ops.
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -// AVX512 provides fast truncate ops. -if (!Subtarget.hasSSE2() \|\| Subtarget.hasAVX512()) - return SDValue(); + // AVX512 provides fast truncate ops. + if (!Subtarget.hasSSE2() \|\| Subtarget.hasAVX512()) + return SDValue(); Lint: Pre-merge checks: clang-format: please reformat the code ``` -// AVX512 provides fast truncate ops. -if (!
	if (!Subtarget.hasSSE2() \|\| Subtarget.hasAVX512())			if (!Subtarget.hasSSE2() \|\| Subtarget.hasAVX512())
	return SDValue();			return SDValue();

	EVT OutSVT = OutVT.getVectorElementType();			EVT OutSVT = OutVT.getVectorElementType();
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -EVT OutSVT = OutVT.getVectorElementType(); -EVT InSVT = InVT.getVectorElementType(); -if (!((InSVT == MVT::i16 \|\| InSVT == MVT::i32 \|\| InSVT == MVT::i64) && - (OutSVT == MVT::i8 \|\| OutSVT == MVT::i16) && isPowerOf2_32(NumElems) && - NumElems >= 8)) - return SDValue(); + EVT OutSVT = OutVT.getVectorElementType(); + EVT InSVT = InVT.getVectorElementType(); + if (!((InSVT == MVT::i16 \|\| InSVT == MVT::i32 \|\| InSVT == MVT::i64) && + (OutSVT == MVT::i8 \|\| OutSVT == MVT::i16) && isPowerOf2_32(NumElems) && 2 diff lines are omitted. See full path. Lint: Pre-merge checks: clang-format: please reformat the code ``` -EVT OutSVT = OutVT.getVectorElementType(); -EVT…
	EVT InSVT = InVT.getVectorElementType();			EVT InSVT = InVT.getVectorElementType();
	if (!((InSVT == MVT::i16 \|\| InSVT == MVT::i32 \|\| InSVT == MVT::i64) &&			if (!((InSVT == MVT::i16 \|\| InSVT == MVT::i32 \|\| InSVT == MVT::i64) &&
	(OutSVT == MVT::i8 \|\| OutSVT == MVT::i16) && isPowerOf2_32(NumElems) &&			(OutSVT == MVT::i8 \|\| OutSVT == MVT::i16) && isPowerOf2_32(NumElems) &&
	NumElems >= 8))			NumElems >= 8))
	return SDValue();			return SDValue();

	// SSSE3's pshufb results in less instructions in the cases below.			// SSSE3's pshufb results in less instructions in the cases below.
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -// SSSE3's pshufb results in less instructions in the cases below. -if (Subtarget.hasSSSE3() && NumElems == 8 && InSVT != MVT::i64) - return SDValue(); + // SSSE3's pshufb results in less instructions in the cases below. + if (Subtarget.hasSSSE3() && NumElems == 8 && InSVT != MVT::i64) + return SDValue(); Lint: Pre-merge checks: clang-format: please reformat the code ``` -// SSSE3's pshufb results in less instructions in…
	if (Subtarget.hasSSSE3() && NumElems == 8 && InSVT != MVT::i64)			if (Subtarget.hasSSSE3() && NumElems == 8 && InSVT != MVT::i64)
	return SDValue();			return SDValue();

	SDLoc DL(N);			SDLoc DL(N);
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -SDLoc DL(N); -// SSE2 provides PACKUS for only 2 x v8i16 -> v16i8 and SSE4.1 provides PACKUS -// for 2 x v4i32 -> v8i16. For SSSE3 and below, we need to use PACKSS to -// truncate 2 x v4i32 to v8i16. -if (Subtarget.hasSSE41() \|\| OutSVT == MVT::i8) - return combineVectorTruncationWithPACKUS(N, DL, Subtarget, DAG); -if (InSVT == MVT::i32) - return combineVectorTruncationWithPACKSS(N, DL, Subtarget, DAG); + SDLoc DL(N); + // SSE2 provides PACKUS for only 2 x v8i16 -> v16i8 and SSE4.1 provides PACKUS 6 diff lines are omitted. See full path. Lint: Pre-merge checks: clang-format: please reformat the code ``` -SDLoc DL(N); -// SSE2 provides PACKUS for only 2 x…
	// SSE2 provides PACKUS for only 2 x v8i16 -> v16i8 and SSE4.1 provides PACKUS			// SSE2 provides PACKUS for only 2 x v8i16 -> v16i8 and SSE4.1 provides PACKUS
	// for 2 x v4i32 -> v8i16. For SSSE3 and below, we need to use PACKSS to			// for 2 x v4i32 -> v8i16. For SSSE3 and below, we need to use PACKSS to
	// truncate 2 x v4i32 to v8i16.			// truncate 2 x v4i32 to v8i16.
	if (Subtarget.hasSSE41() \|\| OutSVT == MVT::i8)			if (Subtarget.hasSSE41() \|\| OutSVT == MVT::i8)
	return combineVectorTruncationWithPACKUS(N, DL, Subtarget, DAG);			return combineVectorTruncationWithPACKUS(N, DL, Subtarget, DAG);
	if (InSVT == MVT::i32)			if (InSVT == MVT::i32)
	return combineVectorTruncationWithPACKSS(N, DL, Subtarget, DAG);			return combineVectorTruncationWithPACKSS(N, DL, Subtarget, DAG);

	return SDValue();			return SDValue();
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -return SDValue(); + return SDValue(); Lint: Pre-merge checks: clang-format: please reformat the code ``` -return SDValue(); + return SDValue(); ```
	}			}

	/// This function transforms vector truncation of 'extended sign-bits' or			/// This function transforms vector truncation of 'extended sign-bits' or
	/// 'extended zero-bits' values.			/// 'extended zero-bits' values.
	/// vXi16/vXi32/vXi64 to vXi8/vXi16/vXi32 into X86ISD::PACKSS/PACKUS operations.			/// vXi16/vXi32/vXi64 to vXi8/vXi16/vXi32 into X86ISD::PACKSS/PACKUS operations.
	static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL,			static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL,
	SelectionDAG &DAG,			SelectionDAG &DAG,
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code - SelectionDAG &DAG, - const X86Subtarget &Subtarget) { -// Requires SSE2. -if (!Subtarget.hasSSE2()) - return SDValue(); - -if (!N->getValueType(0).isVector() \|\| !N->getValueType(0).isSimple()) - return SDValue(); - -SDValue In = N->getOperand(0); 10 diff lines are omitted. See full path. Lint: Pre-merge checks: clang-format: please reformat the code ``` - …
	const X86Subtarget &Subtarget) {			const X86Subtarget &Subtarget) {
	// Requires SSE2.			// Requires SSE2.
	if (!Subtarget.hasSSE2())			if (!Subtarget.hasSSE2())
	return SDValue();			return SDValue();

	if (!N->getValueType(0).isVector() \|\| !N->getValueType(0).isSimple())			if (!N->getValueType(0).isVector() \|\| !N->getValueType(0).isSimple())
	return SDValue();			return SDValue();

	SDValue In = N->getOperand(0);			SDValue In = N->getOperand(0);
	if (!In.getValueType().isSimple())			if (!In.getValueType().isSimple())
	return SDValue();			return SDValue();

	MVT VT = N->getValueType(0).getSimpleVT();			MVT VT = N->getValueType(0).getSimpleVT();
	MVT SVT = VT.getScalarType();			MVT SVT = VT.getScalarType();

	MVT InVT = In.getValueType().getSimpleVT();			MVT InVT = In.getValueType().getSimpleVT();
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -MVT InVT = In.getValueType().getSimpleVT(); -MVT InSVT = InVT.getScalarType(); + if (!N->getValueType(0).isVector() \|\| !N->getValueType(0).isSimple()) + return SDValue(); Lint: Pre-merge checks: clang-format: please reformat the code ``` -MVT InVT = In.getValueType().getSimpleVT(); -MVT…
	MVT InSVT = InVT.getScalarType();			MVT InSVT = InVT.getScalarType();

	// Check we have a truncation suited for PACKSS/PACKUS.			// Check we have a truncation suited for PACKSS/PACKUS.
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -// Check we have a truncation suited for PACKSS/PACKUS. -if (!isPowerOf2_32(VT.getVectorNumElements())) - return SDValue(); -if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32) - return SDValue(); -if (InSVT != MVT::i16 && InSVT != MVT::i32 && InSVT != MVT::i64) - return SDValue(); + SDValue In = N->getOperand(0); + if (!In.getValueType().isSimple()) + return SDValue(); Lint: Pre-merge checks: clang-format: please reformat the code ``` -// Check we have a truncation suited for…
	if (!isPowerOf2_32(VT.getVectorNumElements()))			if (!isPowerOf2_32(VT.getVectorNumElements()))
	return SDValue();			return SDValue();
	if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32)			if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32)
	return SDValue();			return SDValue();
	if (InSVT != MVT::i16 && InSVT != MVT::i32 && InSVT != MVT::i64)			if (InSVT != MVT::i16 && InSVT != MVT::i32 && InSVT != MVT::i64)
	return SDValue();			return SDValue();

	// Truncation to sub-128bit vXi32 can be better handled with shuffles.			// Truncation to sub-128bit vXi32 can be better handled with shuffles.
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -// Truncation to sub-128bit vXi32 can be better handled with shuffles. -if (SVT == MVT::i32 && VT.getSizeInBits() < 128) - return SDValue(); + MVT VT = N->getValueType(0).getSimpleVT(); + MVT SVT = VT.getScalarType(); Lint: Pre-merge checks: clang-format: please reformat the code ``` -// Truncation to sub-128bit vXi32 can be better…
	if (SVT == MVT::i32 && VT.getSizeInBits() < 128)			if (SVT == MVT::i32 && VT.getSizeInBits() < 128)
	return SDValue();			return SDValue();

	// AVX512 has fast truncate, but if the input is already going to be split,			// AVX512 has fast truncate, but if the input is already going to be split,
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -// AVX512 has fast truncate, but if the input is already going to be split, -// there's no harm in trying pack. -if (Subtarget.hasAVX512() && - !(!Subtarget.useAVX512Regs() && VT.is256BitVector() && - InVT.is512BitVector())) { - // PACK should still be worth it for 128-bit vectors if the sources were - // originally concatenated from subvectors. - SmallVector<SDValue> ConcatOps; - if (VT.getSizeInBits() > 128 \|\| !collectConcatOps(In.getNode(), ConcatOps)) - return SDValue(); 3 diff lines are omitted. See full path. Lint: Pre-merge checks: clang-format: please reformat the code ``` -// AVX512 has fast truncate, but if the input is…
	// there's no harm in trying pack.			// there's no harm in trying pack.
	if (Subtarget.hasAVX512() &&			if (Subtarget.hasAVX512() &&
	!(!Subtarget.useAVX512Regs() && VT.is256BitVector() &&			!(!Subtarget.useAVX512Regs() && VT.is256BitVector() &&
	InVT.is512BitVector())) {			InVT.is512BitVector())) {
	// PACK should still be worth it for 128-bit vectors if the sources were			// PACK should still be worth it for 128-bit vectors if the sources were
	// originally concatenated from subvectors.			// originally concatenated from subvectors.
	SmallVector<SDValue> ConcatOps;			SmallVector<SDValue> ConcatOps;
	if (VT.getSizeInBits() > 128 \|\| !collectConcatOps(In.getNode(), ConcatOps))			if (VT.getSizeInBits() > 128 \|\| !collectConcatOps(In.getNode(), ConcatOps))
	return SDValue();			return SDValue();
	}			}

	unsigned NumPackedSignBits = std::min<unsigned>(SVT.getSizeInBits(), 16);			unsigned NumPackedSignBits = std::min<unsigned>(SVT.getSizeInBits(), 16);
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -unsigned NumPackedSignBits = std::min<unsigned>(SVT.getSizeInBits(), 16); -unsigned NumPackedZeroBits = Subtarget.hasSSE41() ? NumPackedSignBits : 8; + // Check we have a truncation suited for PACKSS/PACKUS. + if (!isPowerOf2_32(VT.getVectorNumElements())) + return SDValue(); + if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32) + return SDValue(); + if (InSVT != MVT::i16 && InSVT != MVT::i32 && InSVT != MVT::i64) + return SDValue(); Lint: Pre-merge checks: clang-format: please reformat the code ``` -unsigned NumPackedSignBits = std::min<unsigned>(SVT.
	unsigned NumPackedZeroBits = Subtarget.hasSSE41() ? NumPackedSignBits : 8;			unsigned NumPackedZeroBits = Subtarget.hasSSE41() ? NumPackedSignBits : 8;

	// Use PACKUS if the input has zero-bits that extend all the way to the			// Use PACKUS if the input has zero-bits that extend all the way to the
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -// Use PACKUS if the input has zero-bits that extend all the way to the -// packed/truncated value. e.g. masks, zext_in_reg, etc. -KnownBits Known = DAG.computeKnownBits(In); -unsigned NumLeadingZeroBits = Known.countMinLeadingZeros(); -if (NumLeadingZeroBits >= (InSVT.getSizeInBits() - NumPackedZeroBits)) - return truncateVectorWithPACK(X86ISD::PACKUS, VT, In, DL, DAG, Subtarget); + // Truncation to sub-128bit vXi32 can be better handled with shuffles. + if (SVT == MVT::i32 && VT.getSizeInBits() < 128) + return SDValue(); Lint: Pre-merge checks: clang-format: please reformat the code ``` -// Use PACKUS if the input has zero-bits that…
	// packed/truncated value. e.g. masks, zext_in_reg, etc.			// packed/truncated value. e.g. masks, zext_in_reg, etc.
	KnownBits Known = DAG.computeKnownBits(In);			KnownBits Known = DAG.computeKnownBits(In);
	unsigned NumLeadingZeroBits = Known.countMinLeadingZeros();			unsigned NumLeadingZeroBits = Known.countMinLeadingZeros();
	if (NumLeadingZeroBits >= (InSVT.getSizeInBits() - NumPackedZeroBits))			if (NumLeadingZeroBits >= (InSVT.getSizeInBits() - NumPackedZeroBits))
	return truncateVectorWithPACK(X86ISD::PACKUS, VT, In, DL, DAG, Subtarget);			return truncateVectorWithPACK(X86ISD::PACKUS, VT, In, DL, DAG, Subtarget);

	// Use PACKSS if the input has sign-bits that extend all the way to the			// Use PACKSS if the input has sign-bits that extend all the way to the
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -// Use PACKSS if the input has sign-bits that extend all the way to the -// packed/truncated value. e.g. Comparison result, sext_in_reg, etc. -unsigned NumSignBits = DAG.ComputeNumSignBits(In); + // AVX512 has fast truncate, but if the input is already going to be split, + // there's no harm in trying pack. + if (Subtarget.hasAVX512() && + !(!Subtarget.useAVX512Regs() && VT.is256BitVector() && + InVT.is512BitVector())) { + // PACK should still be worth it for 128-bit vectors if the sources were + // originally concatenated from subvectors. 4 diff lines are omitted. See full path. Lint: Pre-merge checks: clang-format: please reformat the code ``` -// Use PACKSS if the input has sign-bits that…
	// packed/truncated value. e.g. Comparison result, sext_in_reg, etc.			// packed/truncated value. e.g. Comparison result, sext_in_reg, etc.
	unsigned NumSignBits = DAG.ComputeNumSignBits(In);			unsigned NumSignBits = DAG.ComputeNumSignBits(In);

	// Don't use PACKSS for vXi64 -> vXi32 truncations unless we're dealing with
	// a sign splat. ComputeNumSignBits struggles to see through BITCASTs later
	// on and combines/simplifications can't then use it.
	if (SVT == MVT::i32 && NumSignBits != InSVT.getSizeInBits())
	return SDValue();

	unsigned MinSignBits = InSVT.getSizeInBits() - NumPackedSignBits;
	if (NumSignBits > MinSignBits)
	return truncateVectorWithPACK(X86ISD::PACKSS, VT, In, DL, DAG, Subtarget);

	// If we have a srl that only generates signbits that we will discard in
	// the truncation then we can use PACKSS by converting the srl to a sra.
	// SimplifyDemandedBits often relaxes sra to srl so we need to reverse it.
	if (In.getOpcode() == ISD::SRL && N->isOnlyUserOf(In.getNode()))
	if (const APInt *ShAmt = DAG.getValidShiftAmountConstant(
	In, APInt::getAllOnes(VT.getVectorNumElements()))) {
	if (*ShAmt == MinSignBits) {
	SDValue NewIn = DAG.getNode(ISD::SRA, DL, InVT, In->ops());
	return truncateVectorWithPACK(X86ISD::PACKSS, VT, NewIn, DL, DAG,
	Subtarget);
	}
	}

				// Don't use PACKSS for vXi64 -> vXi32 truncations unless we're dealing with
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -// Don't use PACKSS for vXi64 -> vXi32 truncations unless we're dealing with -// a sign splat. ComputeNumSignBits struggles to see through BITCASTs later -// on and combines/simplifications can't then use it. -if (SVT == MVT::i32 && NumSignBits != InSVT.getSizeInBits()) - return SDValue(); + unsigned NumPackedSignBits = std::min<unsigned>(SVT.getSizeInBits(), 16); + unsigned NumPackedZeroBits = Subtarget.hasSSE41() ? NumPackedSignBits : 8; Lint: Pre-merge checks: clang-format: please reformat the code ``` -// Don't use PACKSS for vXi64 -> vXi32 truncations…
				// a sign splat. ComputeNumSignBits struggles to see through BITCASTs later
				// on and combines/simplifications can't then use it.
				if (SVT == MVT::i32 && NumSignBits != InSVT.getSizeInBits())
	return SDValue();			return SDValue();

				unsigned MinSignBits = InSVT.getSizeInBits() - NumPackedSignBits;
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -unsigned MinSignBits = InSVT.getSizeInBits() - NumPackedSignBits; -if (NumSignBits > MinSignBits) - return truncateVectorWithPACK(X86ISD::PACKSS, VT, In, DL, DAG, Subtarget); - -// If we have a srl that only generates signbits that we will discard in -// the truncation then we can use PACKSS by converting the srl to a sra. -// SimplifyDemandedBits often relaxes sra to srl so we need to reverse it. -if (In.getOpcode() == ISD::SRL && N->isOnlyUserOf(In.getNode())) - if (const APInt ShAmt = DAG.getValidShiftAmountConstant( - In, APInt::getAllOnes(VT.getVectorNumElements()))) { 36 diff lines are omitted. See full path. Lint: Pre-merge checks:* clang-format: please reformat the code ``` -unsigned MinSignBits = InSVT.getSizeInBits()…
				if (NumSignBits > MinSignBits)
				return truncateVectorWithPACK(X86ISD::PACKSS, VT, In, DL, DAG, Subtarget);

				// If we have a srl that only generates signbits that we will discard in
				// the truncation then we can use PACKSS by converting the srl to a sra.
				// SimplifyDemandedBits often relaxes sra to srl so we need to reverse it.
				if (In.getOpcode() == ISD::SRL && N->isOnlyUserOf(In.getNode()))
				if (const APInt *ShAmt = DAG.getValidShiftAmountConstant(
				In, APInt::getAllOnes(VT.getVectorNumElements()))) {
				if (*ShAmt == MinSignBits) {
				SDValue NewIn = DAG.getNode(ISD::SRA, DL, InVT, In->ops());
				return truncateVectorWithPACK(X86ISD::PACKSS, VT, NewIn, DL, DAG,
				Subtarget);
				}
				}
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code - } Lint: Pre-merge checks: clang-format: please reformat the code ``` - } ```

				return SDValue();
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -return SDValue(); + return SDValue(); Lint: Pre-merge checks: clang-format: please reformat the code ``` -return SDValue(); + return SDValue(); ```
	}			}

	// Try to form a MULHU or MULHS node by looking for			// Try to form a MULHU or MULHS node by looking for
	// (trunc (srl (mul ext, ext), 16))			// (trunc (srl (mul ext, ext), 16))
	// TODO: This is X86 specific because we want to be able to handle wide types			// TODO: This is X86 specific because we want to be able to handle wide types
	// before type legalization. But we can only do it if the vector will be			// before type legalization. But we can only do it if the vector will be
	// legalized via widening/splitting. Type legalization can't handle promotion			// legalized via widening/splitting. Type legalization can't handle promotion
	// of a MULHU/MULHS. There isn't a way to convey this to the generic DAG			// of a MULHU/MULHS. There isn't a way to convey this to the generic DAG
	// combiner.			// combiner.
	static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL,			static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL,
	SelectionDAG &DAG, const X86Subtarget &Subtarget) {			SelectionDAG &DAG, const X86Subtarget &Subtarget) {
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code - SelectionDAG &DAG, const X86Subtarget &Subtarget) { -// First instruction should be a right shift of a multiply. -if (Src.getOpcode() != ISD::SRL \|\| - Src.getOperand(0).getOpcode() != ISD::MUL) - return SDValue(); + SelectionDAG &DAG, const X86Subtarget &Subtarget) { + // First instruction should be a right shift of a multiply. + if (Src.getOpcode() != ISD::SRL \|\| Src.getOperand(0).getOpcode() != ISD::MUL) + return SDValue(); Lint: Pre-merge checks: clang-format: please reformat the code ``` - SelectionDAG &DAG, const…
	// First instruction should be a right shift of a multiply.			// First instruction should be a right shift of a multiply.
	if (Src.getOpcode() != ISD::SRL \|\|			if (Src.getOpcode() != ISD::SRL \|\|
	Src.getOperand(0).getOpcode() != ISD::MUL)			Src.getOperand(0).getOpcode() != ISD::MUL)
	return SDValue();			return SDValue();

	if (!Subtarget.hasSSE2())			if (!Subtarget.hasSSE2())
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -if (!Subtarget.hasSSE2()) - return SDValue(); + if (!Subtarget.hasSSE2()) + return SDValue(); Lint: Pre-merge checks: clang-format: please reformat the code ``` -if (!Subtarget.hasSSE2()) - return SDValue(); +…
	return SDValue();			return SDValue();

	// Only handle vXi16 types that are at least 128-bits unless they will be			// Only handle vXi16 types that are at least 128-bits unless they will be
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -// Only handle vXi16 types that are at least 128-bits unless they will be -// widened. -if (!VT.isVector() \|\| VT.getVectorElementType() != MVT::i16) - return SDValue(); + // Only handle vXi16 types that are at least 128-bits unless they will be + // widened. + if (!VT.isVector() \|\| VT.getVectorElementType() != MVT::i16) + return SDValue(); Lint: Pre-merge checks: clang-format: please reformat the code ``` -// Only handle vXi16 types that are at least 128…
	// widened.			// widened.
	if (!VT.isVector() \|\| VT.getVectorElementType() != MVT::i16)			if (!VT.isVector() \|\| VT.getVectorElementType() != MVT::i16)
	return SDValue();			return SDValue();

	// Input type should be at least vXi32.			// Input type should be at least vXi32.
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -// Input type should be at least vXi32. -EVT InVT = Src.getValueType(); -if (InVT.getVectorElementType().getSizeInBits() < 32) - return SDValue(); + // Input type should be at least vXi32. + EVT InVT = Src.getValueType(); + if (InVT.getVectorElementType().getSizeInBits() < 32) + return SDValue(); Lint: Pre-merge checks: clang-format: please reformat the code ``` -// Input type should be at least vXi32. -EVT InVT =…
	EVT InVT = Src.getValueType();			EVT InVT = Src.getValueType();
	if (InVT.getVectorElementType().getSizeInBits() < 32)			if (InVT.getVectorElementType().getSizeInBits() < 32)
	return SDValue();			return SDValue();

	// Need a shift by 16.			// Need a shift by 16.
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -// Need a shift by 16. -APInt ShiftAmt; -if (!ISD::isConstantSplatVector(Src.getOperand(1).getNode(), ShiftAmt) \|\| - ShiftAmt != 16) - return SDValue(); + // Need a shift by 16. + APInt ShiftAmt; + if (!ISD::isConstantSplatVector(Src.getOperand(1).getNode(), ShiftAmt) \|\| + ShiftAmt != 16) + return SDValue(); Lint: Pre-merge checks: clang-format: please reformat the code ``` -// Need a shift by 16. -APInt ShiftAmt; -if (!ISD…
	APInt ShiftAmt;			APInt ShiftAmt;
	if (!ISD::isConstantSplatVector(Src.getOperand(1).getNode(), ShiftAmt) \|\|			if (!ISD::isConstantSplatVector(Src.getOperand(1).getNode(), ShiftAmt) \|\|
	ShiftAmt != 16)			ShiftAmt != 16)
	return SDValue();			return SDValue();

	SDValue LHS = Src.getOperand(0).getOperand(0);			SDValue LHS = Src.getOperand(0).getOperand(0);
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -SDValue LHS = Src.getOperand(0).getOperand(0); -SDValue RHS = Src.getOperand(0).getOperand(1); + SDValue LHS = Src.getOperand(0).getOperand(0); + SDValue RHS = Src.getOperand(0).getOperand(1); Lint: Pre-merge checks: clang-format: please reformat the code ``` -SDValue LHS = Src.getOperand(0).getOperand(0)…
	SDValue RHS = Src.getOperand(0).getOperand(1);			SDValue RHS = Src.getOperand(0).getOperand(1);

	unsigned ExtOpc = LHS.getOpcode();			unsigned ExtOpc = LHS.getOpcode();
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -unsigned ExtOpc = LHS.getOpcode(); -if ((ExtOpc != ISD::SIGN_EXTEND && ExtOpc != ISD::ZERO_EXTEND) \|\| - RHS.getOpcode() != ExtOpc) - return SDValue(); + unsigned ExtOpc = LHS.getOpcode(); + if ((ExtOpc != ISD::SIGN_EXTEND && ExtOpc != ISD::ZERO_EXTEND) \|\| + RHS.getOpcode() != ExtOpc) + return SDValue(); Lint: Pre-merge checks: clang-format: please reformat the code ``` -unsigned ExtOpc = LHS.getOpcode(); -if ((ExtOpc !=…
	if ((ExtOpc != ISD::SIGN_EXTEND && ExtOpc != ISD::ZERO_EXTEND) \|\|			if ((ExtOpc != ISD::SIGN_EXTEND && ExtOpc != ISD::ZERO_EXTEND) \|\|
	RHS.getOpcode() != ExtOpc)			RHS.getOpcode() != ExtOpc)
	return SDValue();			return SDValue();

	// Peek through the extends.			// Peek through the extends.
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -// Peek through the extends. -LHS = LHS.getOperand(0); -RHS = RHS.getOperand(0); + // Peek through the extends. + LHS = LHS.getOperand(0); + RHS = RHS.getOperand(0); Lint: Pre-merge checks: clang-format: please reformat the code ``` -// Peek through the extends. -LHS = LHS.getOperand…
	LHS = LHS.getOperand(0);			LHS = LHS.getOperand(0);
	RHS = RHS.getOperand(0);			RHS = RHS.getOperand(0);

	// Ensure the input types match.			// Ensure the input types match.
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -// Ensure the input types match. -if (LHS.getValueType() != VT \|\| RHS.getValueType() != VT) - return SDValue(); + // Ensure the input types match. + if (LHS.getValueType() != VT \|\| RHS.getValueType() != VT) + return SDValue(); Lint: Pre-merge checks: clang-format: please reformat the code ``` -// Ensure the input types match. -if (LHS.
	if (LHS.getValueType() != VT \|\| RHS.getValueType() != VT)			if (LHS.getValueType() != VT \|\| RHS.getValueType() != VT)
	return SDValue();			return SDValue();

	unsigned Opc = ExtOpc == ISD::SIGN_EXTEND ? ISD::MULHS : ISD::MULHU;			unsigned Opc = ExtOpc == ISD::SIGN_EXTEND ? ISD::MULHS : ISD::MULHU;
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -unsigned Opc = ExtOpc == ISD::SIGN_EXTEND ? ISD::MULHS : ISD::MULHU; -return DAG.getNode(Opc, DL, VT, LHS, RHS); + unsigned Opc = ExtOpc == ISD::SIGN_EXTEND ? ISD::MULHS : ISD::MULHU; + return DAG.getNode(Opc, DL, VT, LHS, RHS); Lint: Pre-merge checks: clang-format: please reformat the code ``` -unsigned Opc = ExtOpc == ISD::SIGN_EXTEND ? ISD…
	return DAG.getNode(Opc, DL, VT, LHS, RHS);			return DAG.getNode(Opc, DL, VT, LHS, RHS);
	}			}

	// Attempt to match PMADDUBSW, which multiplies corresponding unsigned bytes			// Attempt to match PMADDUBSW, which multiplies corresponding unsigned bytes
	// from one vector with signed bytes from another vector, adds together			// from one vector with signed bytes from another vector, adds together
	// adjacent pairs of 16-bit products, and saturates the result before			// adjacent pairs of 16-bit products, and saturates the result before
	// truncating to 16-bits.			// truncating to 16-bits.
	//			//
	// Which looks something like this:			// Which looks something like this:
	// (i16 (ssat (add (mul (zext (even elts (i8 A))), (sext (even elts (i8 B)))),			// (i16 (ssat (add (mul (zext (even elts (i8 A))), (sext (even elts (i8 B)))),
	// (mul (zext (odd elts (i8 A)), (sext (odd elts (i8 B))))))))			// (mul (zext (odd elts (i8 A)), (sext (odd elts (i8 B))))))))
	static SDValue detectPMADDUBSW(SDValue In, EVT VT, SelectionDAG &DAG,			static SDValue detectPMADDUBSW(SDValue In, EVT VT, SelectionDAG &DAG,
	const X86Subtarget &Subtarget,			const X86Subtarget &Subtarget,
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code - const X86Subtarget &Subtarget, - const SDLoc &DL) { -if (!VT.isVector() \|\| !Subtarget.hasSSSE3()) - return SDValue(); + const X86Subtarget &Subtarget, const SDLoc &DL) { + if (!VT.isVector() \|\| !Subtarget.hasSSSE3()) + return SDValue(); Lint: Pre-merge checks: clang-format: please reformat the code ``` - const X86Subtarget…
	const SDLoc &DL) {			const SDLoc &DL) {
	if (!VT.isVector() \|\| !Subtarget.hasSSSE3())			if (!VT.isVector() \|\| !Subtarget.hasSSSE3())
	return SDValue();			return SDValue();

	unsigned NumElems = VT.getVectorNumElements();
	EVT ScalarVT = VT.getVectorElementType();
	if (ScalarVT != MVT::i16 \|\| NumElems < 8 \|\| !isPowerOf2_32(NumElems))
	return SDValue();

	SDValue SSatVal = detectSSatPattern(In, VT);
	if (!SSatVal \|\| SSatVal.getOpcode() != ISD::ADD)
	return SDValue();

	// Ok this is a signed saturation of an ADD. See if this ADD is adding pairs
	// of multiplies from even/odd elements.
	SDValue N0 = SSatVal.getOperand(0);
	SDValue N1 = SSatVal.getOperand(1);

	if (N0.getOpcode() != ISD::MUL \|\| N1.getOpcode() != ISD::MUL)
	return SDValue();

	SDValue N00 = N0.getOperand(0);
	SDValue N01 = N0.getOperand(1);
	SDValue N10 = N1.getOperand(0);
	SDValue N11 = N1.getOperand(1);

	// TODO: Handle constant vectors and use knownbits/computenumsignbits?			unsigned NumElems = VT.getVectorNumElements();
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -unsigned NumElems = VT.getVectorNumElements(); -EVT ScalarVT = VT.getVectorElementType(); -if (ScalarVT != MVT::i16 \|\| NumElems < 8 \|\| !isPowerOf2_32(NumElems)) - return SDValue(); + unsigned NumElems = VT.getVectorNumElements(); + EVT ScalarVT = VT.getVectorElementType(); + if (ScalarVT != MVT::i16 \|\| NumElems < 8 \|\| !isPowerOf2_32(NumElems)) + return SDValue(); Lint: Pre-merge checks: clang-format: please reformat the code ``` -unsigned NumElems = VT.getVectorNumElements(); -EVT…
	// Canonicalize zero_extend to LHS.			EVT ScalarVT = VT.getVectorElementType();
	if (N01.getOpcode() == ISD::ZERO_EXTEND)			if (ScalarVT != MVT::i16 \|\| NumElems < 8 \|\| !isPowerOf2_32(NumElems))
	std::swap(N00, N01);			return SDValue();
	if (N11.getOpcode() == ISD::ZERO_EXTEND)
	std::swap(N10, N11);

	// Ensure we have a zero_extend and a sign_extend.			SDValue SSatVal = detectSSatPattern(In, VT);
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -SDValue SSatVal = detectSSatPattern(In, VT); -if (!SSatVal \|\| SSatVal.getOpcode() != ISD::ADD) - return SDValue(); + SDValue SSatVal = detectSSatPattern(In, VT); + if (!SSatVal \|\| SSatVal.getOpcode() != ISD::ADD) + return SDValue(); Lint: Pre-merge checks: clang-format: please reformat the code ``` -SDValue SSatVal = detectSSatPattern(In, VT); -if (!
	if (N00.getOpcode() != ISD::ZERO_EXTEND \|\|			if (!SSatVal \|\| SSatVal.getOpcode() != ISD::ADD)
	N01.getOpcode() != ISD::SIGN_EXTEND \|\|			return SDValue();
	N10.getOpcode() != ISD::ZERO_EXTEND \|\|
	N11.getOpcode() != ISD::SIGN_EXTEND)
	return SDValue();

	// Peek through the extends.			// Ok this is a signed saturation of an ADD. See if this ADD is adding pairs
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -// Ok this is a signed saturation of an ADD. See if this ADD is adding pairs -// of multiplies from even/odd elements. -SDValue N0 = SSatVal.getOperand(0); -SDValue N1 = SSatVal.getOperand(1); + // Ok this is a signed saturation of an ADD. See if this ADD is adding pairs + // of multiplies from even/odd elements. + SDValue N0 = SSatVal.getOperand(0); + SDValue N1 = SSatVal.getOperand(1); Lint: Pre-merge checks: clang-format: please reformat the code ``` -// Ok this is a signed saturation of an ADD. See if…
	N00 = N00.getOperand(0);			// of multiplies from even/odd elements.
	N01 = N01.getOperand(0);			SDValue N0 = SSatVal.getOperand(0);
	N10 = N10.getOperand(0);			SDValue N1 = SSatVal.getOperand(1);
	N11 = N11.getOperand(0);

	// Ensure the extend is from vXi8.			if (N0.getOpcode() != ISD::MUL \|\| N1.getOpcode() != ISD::MUL)
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -if (N0.getOpcode() != ISD::MUL \|\| N1.getOpcode() != ISD::MUL) - return SDValue(); + if (N0.getOpcode() != ISD::MUL \|\| N1.getOpcode() != ISD::MUL) + return SDValue(); Lint: Pre-merge checks: clang-format: please reformat the code ``` -if (N0.getOpcode() != ISD::MUL \|\| N1.getOpcode() !=…
	if (N00.getValueType().getVectorElementType() != MVT::i8 \|\|			return SDValue();
	N01.getValueType().getVectorElementType() != MVT::i8 \|\|
	N10.getValueType().getVectorElementType() != MVT::i8 \|\|
	N11.getValueType().getVectorElementType() != MVT::i8)
	return SDValue();

	// All inputs should be build_vectors.			SDValue N00 = N0.getOperand(0);
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -SDValue N00 = N0.getOperand(0); -SDValue N01 = N0.getOperand(1); -SDValue N10 = N1.getOperand(0); -SDValue N11 = N1.getOperand(1); - -// TODO: Handle constant vectors and use knownbits/computenumsignbits? -// Canonicalize zero_extend to LHS. -if (N01.getOpcode() == ISD::ZERO_EXTEND) - std::swap(N00, N01); -if (N11.getOpcode() == ISD::ZERO_EXTEND) 12 diff lines are omitted. See full path. Lint: Pre-merge checks: clang-format: please reformat the code ``` -SDValue N00 = N0.getOperand(0); -SDValue N01 = N0.
	if (N00.getOpcode() != ISD::BUILD_VECTOR \|\|			SDValue N01 = N0.getOperand(1);
	N01.getOpcode() != ISD::BUILD_VECTOR \|\|			SDValue N10 = N1.getOperand(0);
	N10.getOpcode() != ISD::BUILD_VECTOR \|\|			SDValue N11 = N1.getOperand(1);
	N11.getOpcode() != ISD::BUILD_VECTOR)
	return SDValue();			// TODO: Handle constant vectors and use knownbits/computenumsignbits?
				// Canonicalize zero_extend to LHS.
				if (N01.getOpcode() == ISD::ZERO_EXTEND)
				std::swap(N00, N01);
				if (N11.getOpcode() == ISD::ZERO_EXTEND)
				std::swap(N10, N11);

				// Ensure we have a zero_extend and a sign_extend.
				if (N00.getOpcode() != ISD::ZERO_EXTEND \|\|
				N01.getOpcode() != ISD::SIGN_EXTEND \|\|
				N10.getOpcode() != ISD::ZERO_EXTEND \|\|
				N11.getOpcode() != ISD::SIGN_EXTEND)
				return SDValue();

	// N00/N10 are zero extended. N01/N11 are sign extended.			// Peek through the extends.
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -// Peek through the extends. -N00 = N00.getOperand(0); -N01 = N01.getOperand(0); -N10 = N10.getOperand(0); -N11 = N11.getOperand(0); - -// Ensure the extend is from vXi8. -if (N00.getValueType().getVectorElementType() != MVT::i8 \|\| - N01.getValueType().getVectorElementType() != MVT::i8 \|\| - N10.getValueType().getVectorElementType() != MVT::i8 \|\| 8 diff lines are omitted. See full path. Lint: Pre-merge checks: clang-format: please reformat the code ``` -// Peek through the extends. -N00 = N00.getOperand…
				N00 = N00.getOperand(0);
				N01 = N01.getOperand(0);
				N10 = N10.getOperand(0);
				N11 = N11.getOperand(0);

				// Ensure the extend is from vXi8.
				if (N00.getValueType().getVectorElementType() != MVT::i8 \|\|
				N01.getValueType().getVectorElementType() != MVT::i8 \|\|
				N10.getValueType().getVectorElementType() != MVT::i8 \|\|
				N11.getValueType().getVectorElementType() != MVT::i8)
				return SDValue();

	// For each element, we need to ensure we have an odd element from one vector			// All inputs should be build_vectors.
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -// All inputs should be build_vectors. -if (N00.getOpcode() != ISD::BUILD_VECTOR \|\| - N01.getOpcode() != ISD::BUILD_VECTOR \|\| - N10.getOpcode() != ISD::BUILD_VECTOR \|\| - N11.getOpcode() != ISD::BUILD_VECTOR) - return SDValue(); + // Ensure we have a zero_extend and a sign_extend. + if (N00.getOpcode() != ISD::ZERO_EXTEND \|\| + N01.getOpcode() != ISD::SIGN_EXTEND \|\| + N10.getOpcode() != ISD::ZERO_EXTEND \|\| 2 diff lines are omitted. See full path. Lint: Pre-merge checks: clang-format: please reformat the code ``` -// All inputs should be build_vectors. -if (N00.
	// multiplied by the odd element of another vector and the even element from			if (N00.getOpcode() != ISD::BUILD_VECTOR \|\|
	// one of the same vectors being multiplied by the even element from the			N01.getOpcode() != ISD::BUILD_VECTOR \|\|
	// other vector. So we need to make sure for each element i, this operator			N10.getOpcode() != ISD::BUILD_VECTOR \|\|
	// is being performed:			N11.getOpcode() != ISD::BUILD_VECTOR)
	// A[2 * i] * B[2 * i] + A[2 * i + 1] * B[2 * i + 1]			return SDValue();
	SDValue ZExtIn, SExtIn;
	for (unsigned i = 0; i != NumElems; ++i) {
	SDValue N00Elt = N00.getOperand(i);
	SDValue N01Elt = N01.getOperand(i);
	SDValue N10Elt = N10.getOperand(i);
	SDValue N11Elt = N11.getOperand(i);
	// TODO: Be more tolerant to undefs.
	if (N00Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	N01Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	N10Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	N11Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
	return SDValue();
	auto *ConstN00Elt = dyn_cast<ConstantSDNode>(N00Elt.getOperand(1));
	auto *ConstN01Elt = dyn_cast<ConstantSDNode>(N01Elt.getOperand(1));
	auto *ConstN10Elt = dyn_cast<ConstantSDNode>(N10Elt.getOperand(1));
	auto *ConstN11Elt = dyn_cast<ConstantSDNode>(N11Elt.getOperand(1));
	if (!ConstN00Elt \|\| !ConstN01Elt \|\| !ConstN10Elt \|\| !ConstN11Elt)
	return SDValue();
	unsigned IdxN00 = ConstN00Elt->getZExtValue();
	unsigned IdxN01 = ConstN01Elt->getZExtValue();
	unsigned IdxN10 = ConstN10Elt->getZExtValue();
	unsigned IdxN11 = ConstN11Elt->getZExtValue();
	// Add is commutative so indices can be reordered.
	if (IdxN00 > IdxN10) {
	std::swap(IdxN00, IdxN10);
	std::swap(IdxN01, IdxN11);
	}
	// N0 indices be the even element. N1 indices must be the next odd element.
	if (IdxN00 != 2 * i \|\| IdxN10 != 2 * i + 1 \|\|
	IdxN01 != 2 * i \|\| IdxN11 != 2 * i + 1)
	return SDValue();
	SDValue N00In = N00Elt.getOperand(0);
	SDValue N01In = N01Elt.getOperand(0);
	SDValue N10In = N10Elt.getOperand(0);
	SDValue N11In = N11Elt.getOperand(0);
	// First time we find an input capture it.
	if (!ZExtIn) {
	ZExtIn = N00In;
	SExtIn = N01In;
	}
	if (ZExtIn != N00In \|\| SExtIn != N01In \|\|
	ZExtIn != N10In \|\| SExtIn != N11In)
	return SDValue();
	}

	auto PMADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,			// N00/N10 are zero extended. N01/N11 are sign extended.
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -// N00/N10 are zero extended. N01/N11 are sign extended. - -// For each element, we need to ensure we have an odd element from one vector -// multiplied by the odd element of another vector and the even element from -// one of the same vectors being multiplied by the even element from the -// other vector. So we need to make sure for each element i, this operator -// is being performed: -// A[2 * i] * B[2 * i] + A[2 * i + 1] * B[2 * i + 1] -SDValue ZExtIn, SExtIn; -for (unsigned i = 0; i != NumElems; ++i) { 143 diff lines are omitted. See full path. Lint: Pre-merge checks: clang-format: please reformat the code ``` -// N00/N10 are zero extended. N01/N11 are sign…
	ArrayRef<SDValue> Ops) {
	// Shrink by adding truncate nodes and let DAGCombine fold with the			// For each element, we need to ensure we have an odd element from one vector
	// sources.			// multiplied by the odd element of another vector and the even element from
	EVT InVT = Ops[0].getValueType();			// one of the same vectors being multiplied by the even element from the
	assert(InVT.getScalarType() == MVT::i8 &&			// other vector. So we need to make sure for each element i, this operator
	"Unexpected scalar element type");			// is being performed:
	assert(InVT == Ops[1].getValueType() && "Operands' types mismatch");			// A[2 * i] * B[2 * i] + A[2 * i + 1] * B[2 * i + 1]
	EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,			SDValue ZExtIn, SExtIn;
	InVT.getVectorNumElements() / 2);			for (unsigned i = 0; i != NumElems; ++i) {
				Lint: Pre-merge checks Inline Actions clang-tidy: warning: invalid case style for variable 'i' [readability-identifier-naming] not useful Lint: Pre-merge checks: clang-tidy: warning: invalid case style for variable 'i' [readability-identifier-naming]…
	return DAG.getNode(X86ISD::VPMADDUBSW, DL, ResVT, Ops[0], Ops[1]);			SDValue N00Elt = N00.getOperand(i);
	};			SDValue N01Elt = N01.getOperand(i);
	return SplitOpsAndApply(DAG, Subtarget, DL, VT, { ZExtIn, SExtIn },			SDValue N10Elt = N10.getOperand(i);
	PMADDBuilder);			SDValue N11Elt = N11.getOperand(i);
				// TODO: Be more tolerant to undefs.
				if (N00Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
				N01Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
				N10Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
				N11Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
				return SDValue();
				auto *ConstN00Elt = dyn_cast<ConstantSDNode>(N00Elt.getOperand(1));
				auto *ConstN01Elt = dyn_cast<ConstantSDNode>(N01Elt.getOperand(1));
				auto *ConstN10Elt = dyn_cast<ConstantSDNode>(N10Elt.getOperand(1));
				auto *ConstN11Elt = dyn_cast<ConstantSDNode>(N11Elt.getOperand(1));
				if (!ConstN00Elt \|\| !ConstN01Elt \|\| !ConstN10Elt \|\| !ConstN11Elt)
				return SDValue();
				unsigned IdxN00 = ConstN00Elt->getZExtValue();
				unsigned IdxN01 = ConstN01Elt->getZExtValue();
				unsigned IdxN10 = ConstN10Elt->getZExtValue();
				unsigned IdxN11 = ConstN11Elt->getZExtValue();
				// Add is commutative so indices can be reordered.
				if (IdxN00 > IdxN10) {
				std::swap(IdxN00, IdxN10);
				std::swap(IdxN01, IdxN11);
				}
				// N0 indices be the even element. N1 indices must be the next odd element.
				if (IdxN00 != 2 * i \|\| IdxN10 != 2 * i + 1 \|\|
				IdxN01 != 2 * i \|\| IdxN11 != 2 * i + 1)
				return SDValue();
				SDValue N00In = N00Elt.getOperand(0);
				SDValue N01In = N01Elt.getOperand(0);
				SDValue N10In = N10Elt.getOperand(0);
				SDValue N11In = N11Elt.getOperand(0);
				// First time we find an input capture it.
				if (!ZExtIn) {
				ZExtIn = N00In;
				SExtIn = N01In;
				}
				if (ZExtIn != N00In \|\| SExtIn != N01In \|\|
				ZExtIn != N10In \|\| SExtIn != N11In)
				return SDValue();
				}

				auto PMADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
				ArrayRef<SDValue> Ops) {
				// Shrink by adding truncate nodes and let DAGCombine fold with the
				// sources.
				EVT InVT = Ops[0].getValueType();
				assert(InVT.getScalarType() == MVT::i8 &&
				"Unexpected scalar element type");
				assert(InVT == Ops[1].getValueType() && "Operands' types mismatch");
				EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
				InVT.getVectorNumElements() / 2);
				return DAG.getNode(X86ISD::VPMADDUBSW, DL, ResVT, Ops[0], Ops[1]);
				};
				return SplitOpsAndApply(DAG, Subtarget, DL, VT, { ZExtIn, SExtIn },
				PMADDBuilder);
	}			}

	static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,			static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
	const X86Subtarget &Subtarget) {			const X86Subtarget &Subtarget) {
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code - const X86Subtarget &Subtarget) { -EVT VT = N->getValueType(0); -SDValue Src = N->getOperand(0); -SDLoc DL(N); + const X86Subtarget &Subtarget) { + EVT VT = N->getValueType(0); + SDValue Src = N->getOperand(0); + SDLoc DL(N); Lint: Pre-merge checks: clang-format: please reformat the code ``` - const X86Subtarget…
	EVT VT = N->getValueType(0);			EVT VT = N->getValueType(0);
	SDValue Src = N->getOperand(0);			SDValue Src = N->getOperand(0);
	SDLoc DL(N);			SDLoc DL(N);

	// Attempt to pre-truncate inputs to arithmetic ops instead.			// Attempt to pre-truncate inputs to arithmetic ops instead.
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -// Attempt to pre-truncate inputs to arithmetic ops instead. -if (SDValue V = combineTruncatedArithmetic(N, DAG, Subtarget, DL)) - return V; + // Attempt to pre-truncate inputs to arithmetic ops instead. + if (SDValue V = combineTruncatedArithmetic(N, DAG, Subtarget, DL)) + return V; Lint: Pre-merge checks: clang-format: please reformat the code ``` -// Attempt to pre-truncate inputs to arithmetic ops…
	if (SDValue V = combineTruncatedArithmetic(N, DAG, Subtarget, DL))			if (SDValue V = combineTruncatedArithmetic(N, DAG, Subtarget, DL))
	return V;			return V;

	// Try to detect AVG pattern first.			// Try to detect AVG pattern first.
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -// Try to detect AVG pattern first. -if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL)) - return Avg; + // Try to detect AVG pattern first. + if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL)) + return Avg; Lint: Pre-merge checks: clang-format: please reformat the code ``` -// Try to detect AVG pattern first. -if (SDValue…
	if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))			if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))
	return Avg;			return Avg;

	// Try to detect PMADD			// Try to detect PMADD
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -// Try to detect PMADD -if (SDValue PMAdd = detectPMADDUBSW(Src, VT, DAG, Subtarget, DL)) - return PMAdd; + // Try to detect PMADD + if (SDValue PMAdd = detectPMADDUBSW(Src, VT, DAG, Subtarget, DL)) + return PMAdd; Lint: Pre-merge checks: clang-format: please reformat the code ``` -// Try to detect PMADD -if (SDValue PMAdd =…
	if (SDValue PMAdd = detectPMADDUBSW(Src, VT, DAG, Subtarget, DL))			if (SDValue PMAdd = detectPMADDUBSW(Src, VT, DAG, Subtarget, DL))
	return PMAdd;			return PMAdd;

	// Try to combine truncation with signed/unsigned saturation.			// Try to combine truncation with signed/unsigned saturation.
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -// Try to combine truncation with signed/unsigned saturation. -if (SDValue Val = combineTruncateWithSat(Src, VT, DL, DAG, Subtarget)) - return Val; + // Try to combine truncation with signed/unsigned saturation. + if (SDValue Val = combineTruncateWithSat(Src, VT, DL, DAG, Subtarget)) + return Val; Lint: Pre-merge checks: clang-format: please reformat the code ``` -// Try to combine truncation with signed/unsigned…
	if (SDValue Val = combineTruncateWithSat(Src, VT, DL, DAG, Subtarget))			if (SDValue Val = combineTruncateWithSat(Src, VT, DL, DAG, Subtarget))
	return Val;			return Val;

	// Try to combine PMULHUW/PMULHW for vXi16.			// Try to combine PMULHUW/PMULHW for vXi16.
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -// Try to combine PMULHUW/PMULHW for vXi16. -if (SDValue V = combinePMULH(Src, VT, DL, DAG, Subtarget)) - return V; + // Try to combine PMULHUW/PMULHW for vXi16. + if (SDValue V = combinePMULH(Src, VT, DL, DAG, Subtarget)) + return V; Lint: Pre-merge checks: clang-format: please reformat the code ``` -// Try to combine PMULHUW/PMULHW for vXi16. -if…
	if (SDValue V = combinePMULH(Src, VT, DL, DAG, Subtarget))			if (SDValue V = combinePMULH(Src, VT, DL, DAG, Subtarget))
	return V;			return V;

	// The bitcast source is a direct mmx result.			// The bitcast source is a direct mmx result.
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -// The bitcast source is a direct mmx result. -// Detect bitcasts between i32 to x86mmx -if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) { - SDValue BCSrc = Src.getOperand(0); - if (BCSrc.getValueType() == MVT::x86mmx) - return DAG.getNode(X86ISD::MMX_MOVD2W, DL, MVT::i32, BCSrc); -} + // The bitcast source is a direct mmx result. + // Detect bitcasts between i32 to x86mmx + if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) { 4 diff lines are omitted. See full path. Lint: Pre-merge checks: clang-format: please reformat the code ``` -// The bitcast source is a direct mmx result. -//…
	// Detect bitcasts between i32 to x86mmx			// Detect bitcasts between i32 to x86mmx
	if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) {			if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) {
	SDValue BCSrc = Src.getOperand(0);			SDValue BCSrc = Src.getOperand(0);
	if (BCSrc.getValueType() == MVT::x86mmx)			if (BCSrc.getValueType() == MVT::x86mmx)
	return DAG.getNode(X86ISD::MMX_MOVD2W, DL, MVT::i32, BCSrc);			return DAG.getNode(X86ISD::MMX_MOVD2W, DL, MVT::i32, BCSrc);
	}			}

	// Try to truncate extended sign/zero bits with PACKSS/PACKUS.			// Try to truncate extended sign/zero bits with PACKSS/PACKUS.
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -// Try to truncate extended sign/zero bits with PACKSS/PACKUS. -if (SDValue V = combineVectorSignBitsTruncation(N, DL, DAG, Subtarget)) - return V; + // Try to truncate extended sign/zero bits with PACKSS/PACKUS. + if (SDValue V = combineVectorSignBitsTruncation(N, DL, DAG, Subtarget)) + return V; Lint: Pre-merge checks: clang-format: please reformat the code ``` -// Try to truncate extended sign/zero bits with…
	if (SDValue V = combineVectorSignBitsTruncation(N, DL, DAG, Subtarget))			if (SDValue V = combineVectorSignBitsTruncation(N, DL, DAG, Subtarget))
	return V;			return V;

	return combineVectorTruncation(N, DAG, Subtarget);			return combineVectorTruncation(N, DAG, Subtarget);
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -return combineVectorTruncation(N, DAG, Subtarget); + return combineVectorTruncation(N, DAG, Subtarget); Lint: Pre-merge checks: clang-format: please reformat the code ``` -return combineVectorTruncation(N, DAG, Subtarget)…
	}			}

	static SDValue combineVTRUNC(SDNode *N, SelectionDAG &DAG,			static SDValue combineVTRUNC(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI) {			TargetLowering::DAGCombinerInfo &DCI) {
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code - TargetLowering::DAGCombinerInfo &DCI) { -EVT VT = N->getValueType(0); -SDValue In = N->getOperand(0); -SDLoc DL(N); + TargetLowering::DAGCombinerInfo &DCI) { + EVT VT = N->getValueType(0); + SDValue In = N->getOperand(0); + SDLoc DL(N); Lint: Pre-merge checks: clang-format: please reformat the code ``` - TargetLowering…
	EVT VT = N->getValueType(0);			EVT VT = N->getValueType(0);
	SDValue In = N->getOperand(0);			SDValue In = N->getOperand(0);
	SDLoc DL(N);			SDLoc DL(N);

	if (auto SSatVal = detectSSatPattern(In, VT))			if (auto SSatVal = detectSSatPattern(In, VT))
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -if (auto SSatVal = detectSSatPattern(In, VT)) - return DAG.getNode(X86ISD::VTRUNCS, DL, VT, SSatVal); -if (auto USatVal = detectUSatPattern(In, VT, DAG, DL)) - return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal); + if (auto SSatVal = detectSSatPattern(In, VT)) + return DAG.getNode(X86ISD::VTRUNCS, DL, VT, SSatVal); + if (auto USatVal = detectUSatPattern(In, VT, DAG, DL)) + return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal); Lint: Pre-merge checks: clang-format: please reformat the code ``` -if (auto SSatVal = detectSSatPattern(In, VT))…
	return DAG.getNode(X86ISD::VTRUNCS, DL, VT, SSatVal);			return DAG.getNode(X86ISD::VTRUNCS, DL, VT, SSatVal);
	if (auto USatVal = detectUSatPattern(In, VT, DAG, DL))			if (auto USatVal = detectUSatPattern(In, VT, DAG, DL))
	return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);			return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();			const TargetLowering &TLI = DAG.getTargetLoweringInfo();
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -const TargetLowering &TLI = DAG.getTargetLoweringInfo(); -APInt DemandedMask(APInt::getAllOnes(VT.getScalarSizeInBits())); -if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI)) - return SDValue(N, 0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + APInt DemandedMask(APInt::getAllOnes(VT.getScalarSizeInBits())); + if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI)) + return SDValue(N, 0); Lint: Pre-merge checks: clang-format: please reformat the code ``` -const TargetLowering &TLI = DAG.
	APInt DemandedMask(APInt::getAllOnes(VT.getScalarSizeInBits()));			APInt DemandedMask(APInt::getAllOnes(VT.getScalarSizeInBits()));
	if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))			if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
	return SDValue(N, 0);			return SDValue(N, 0);

	return SDValue();			return SDValue();
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -return SDValue(); + return SDValue(); Lint: Pre-merge checks: clang-format: please reformat the code ``` -return SDValue(); + return SDValue(); ```
	}			}

	/// Returns the negated value if the node \p N flips sign of FP value.			/// Returns the negated value if the node \p N flips sign of FP value.
	///			///
	/// FP-negation node may have different forms: FNEG(x), FXOR (x, 0x80000000)			/// FP-negation node may have different forms: FNEG(x), FXOR (x, 0x80000000)
	/// or FSUB(0, x)			/// or FSUB(0, x)
	/// AVX512F does not have FXOR, so FNEG is lowered as			/// AVX512F does not have FXOR, so FNEG is lowered as
	/// (bitcast (xor (bitcast x), (bitcast ConstantFP(0x80000000)))).			/// (bitcast (xor (bitcast x), (bitcast ConstantFP(0x80000000)))).
	/// In this case we go though all bitcasts.			/// In this case we go though all bitcasts.
	/// This also recognizes splat of a negated value and returns the splat of that			/// This also recognizes splat of a negated value and returns the splat of that
	/// value.			/// value.
	static SDValue isFNEG(SelectionDAG &DAG, SDNode *N, unsigned Depth = 0) {			static SDValue isFNEG(SelectionDAG &DAG, SDNode *N, unsigned Depth = 0) {
	if (N->getOpcode() == ISD::FNEG)			if (N->getOpcode() == ISD::FNEG)
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -if (N->getOpcode() == ISD::FNEG) - return N->getOperand(0); + if (N->getOpcode() == ISD::FNEG) + return N->getOperand(0); Lint: Pre-merge checks: clang-format: please reformat the code ``` -if (N->getOpcode() == ISD::FNEG) - return N…
	return N->getOperand(0);			return N->getOperand(0);

	// Don't recurse exponentially.			// Don't recurse exponentially.
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -// Don't recurse exponentially. -if (Depth > SelectionDAG::MaxRecursionDepth) - return SDValue(); + // Don't recurse exponentially. + if (Depth > SelectionDAG::MaxRecursionDepth) + return SDValue(); Lint: Pre-merge checks: clang-format: please reformat the code ``` -// Don't recurse exponentially. -if (Depth >…
	if (Depth > SelectionDAG::MaxRecursionDepth)			if (Depth > SelectionDAG::MaxRecursionDepth)
	return SDValue();			return SDValue();

	unsigned ScalarSize = N->getValueType(0).getScalarSizeInBits();			unsigned ScalarSize = N->getValueType(0).getScalarSizeInBits();
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -unsigned ScalarSize = N->getValueType(0).getScalarSizeInBits(); + unsigned ScalarSize = N->getValueType(0).getScalarSizeInBits(); Lint: Pre-merge checks: clang-format: please reformat the code ``` -unsigned ScalarSize = N->getValueType(0).

	SDValue Op = peekThroughBitcasts(SDValue(N, 0));			SDValue Op = peekThroughBitcasts(SDValue(N, 0));
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -SDValue Op = peekThroughBitcasts(SDValue(N, 0)); -EVT VT = Op->getValueType(0); + SDValue Op = peekThroughBitcasts(SDValue(N, 0)); + EVT VT = Op->getValueType(0); Lint: Pre-merge checks: clang-format: please reformat the code ``` -SDValue Op = peekThroughBitcasts(SDValue(N, 0))…
	EVT VT = Op->getValueType(0);			EVT VT = Op->getValueType(0);

	// Make sure the element size doesn't change.			// Make sure the element size doesn't change.
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -// Make sure the element size doesn't change. -if (VT.getScalarSizeInBits() != ScalarSize) - return SDValue(); + // Make sure the element size doesn't change. + if (VT.getScalarSizeInBits() != ScalarSize) + return SDValue(); Lint: Pre-merge checks: clang-format: please reformat the code ``` -// Make sure the element size doesn't change. -if…
	if (VT.getScalarSizeInBits() != ScalarSize)			if (VT.getScalarSizeInBits() != ScalarSize)
	return SDValue();			return SDValue();

	unsigned Opc = Op.getOpcode();			unsigned Opc = Op.getOpcode();
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code -unsigned Opc = Op.getOpcode(); -switch (Opc) { -case ISD::VECTOR_SHUFFLE: { - // For a VECTOR_SHUFFLE(VEC1, VEC2), if the VEC2 is undef, then the negate - // of this is VECTOR_SHUFFLE(-VEC1, UNDEF). The mask can be anything here. - if (!Op.getOperand(1).isUndef()) - return SDValue(); - if (SDValue NegOp0 = isFNEG(DAG, Op.getOperand(0).getNode(), Depth + 1)) - if (NegOp0.getValueType() == VT) // FIXME: Can we do better? - return DAG.getVectorShuffle(VT, SDLoc(Op), NegOp0, DAG.getUNDEF(VT), 52 diff lines are omitted. See full path. Lint: Pre-merge checks: clang-format: please reformat the code ``` -unsigned Opc = Op.getOpcode(); -switch (Opc) {…
	switch (Opc) {			switch (Opc) {
	case ISD::VECTOR_SHUFFLE: {			case ISD::VECTOR_SHUFFLE: {
	// For a VECTOR_SHUFFLE(VEC1, VEC2), if the VEC2 is undef, then the negate			// For a VECTOR_SHUFFLE(VEC1, VEC2), if the VEC2 is undef, then the negate
	// of this is VECTOR_SHUFFLE(-VEC1, UNDEF). The mask can be anything here.			// of this is VECTOR_SHUFFLE(-VEC1, UNDEF). The mask can be anything here.
	if (!Op.getOperand(1).isUndef())			if (!Op.getOperand(1).isUndef())
	return SDValue();			return SDValue();
	if (SDValue NegOp0 = isFNEG(DAG, Op.getOperand(0).getNode(), Depth + 1))			if (SDValue NegOp0 = isFNEG(DAG, Op.getOperand(0).getNode(), Depth + 1))
	if (NegOp0.getValueType() == VT) // FIXME: Can we do better?			if (NegOp0.getValueType() == VT) // FIXME: Can we do better?
	return DAG.getVectorShuffle(VT, SDLoc(Op), NegOp0, DAG.getUNDEF(VT),			return DAG.getVectorShuffle(VT, SDLoc(Op), NegOp0, DAG.getUNDEF(VT),
	cast<ShuffleVectorSDNode>(Op)->getMask());			cast<ShuffleVectorSDNode>(Op)->getMask());
	break;			break;
	}			}
	case ISD::INSERT_VECTOR_ELT: {			case ISD::INSERT_VECTOR_ELT: {
	// Negate of INSERT_VECTOR_ELT(UNDEF, V, INDEX) is INSERT_VECTOR_ELT(UNDEF,			// Negate of INSERT_VECTOR_ELT(UNDEF, V, INDEX) is INSERT_VECTOR_ELT(UNDEF,
	// -V, INDEX).			// -V, INDEX).
	SDValue InsVector = Op.getOperand(0);			SDValue InsVector = Op.getOperand(0);
	SDValue InsVal = Op.getOperand(1);			SDValue InsVal = Op.getOperand(1);
	if (!InsVector.isUndef())			if (!InsVector.isUndef())
	return SDValue();			return SDValue();
	if (SDValue NegInsVal = isFNEG(DAG, InsVal.getNode(), Depth + 1))			if (SDValue NegInsVal = isFNEG(DAG, InsVal.getNode(), Depth + 1))
	if (NegInsVal.getValueType() == VT.getVectorElementType()) // FIXME			if (NegInsVal.getValueType() == VT.getVectorElementType()) // FIXME
	return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), VT, InsVector,			return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), VT, InsVector,
	NegInsVal, Op.getOperand(2));			NegInsVal, Op.getOperand(2));
	break;			break;
	}			}
	case ISD::FSUB:			case ISD::FSUB:
	case ISD::XOR:			case ISD::XOR:
	case X86ISD::FXOR: {			case X86ISD::FXOR: {
	SDValue Op1 = Op.getOperand(1);			SDValue Op1 = Op.getOperand(1);
	SDValue Op0 = Op.getOperand(0);			SDValue Op0 = Op.getOperand(0);

	// For XOR and FXOR, we want to check if constant			// For XOR and FXOR, we want to check if constant
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code - // For XOR and FXOR, we want to check if constant - // bits of Op1 are sign bit masks. For FSUB, we - // have to check if constant bits of Op0 are sign - // bit masks and hence we swap the operands. - if (Opc == ISD::FSUB) - std::swap(Op0, Op1); + // For XOR and FXOR, we want to check if constant + // bits of Op1 are sign bit masks. For FSUB, we + // have to check if constant bits of Op0 are sign + // bit masks and hence we swap the operands. 2 diff lines are omitted. See full path. Lint: Pre-merge checks: clang-format: please reformat the code ``` - // For XOR and FXOR, we want to check if constant…
	// bits of Op1 are sign bit masks. For FSUB, we			// bits of Op1 are sign bit masks. For FSUB, we
	// have to check if constant bits of Op0 are sign			// have to check if constant bits of Op0 are sign
	// bit masks and hence we swap the operands.			// bit masks and hence we swap the operands.
	if (Opc == ISD::FSUB)			if (Opc == ISD::FSUB)
	std::swap(Op0, Op1);			std::swap(Op0, Op1);

	APInt UndefElts;			APInt UndefElts;
				Lint: Pre-merge checks Inline Actions clang-format: please reformat the code - APInt UndefElts; - SmallVector<APInt, 16> EltBits; - // Extract constant bits and see if they are all - // sign bit masks. Ignore the undef elements. - if (getTargetConstantBitsFromNode(Op1, ScalarSize, UndefElts, EltBits, - /* AllowWholeUndefs / - true, - / AllowPartialUndefs / false)) { + APInt UndefElts; + SmallVector<APInt, 16> EltBits; 6 diff lines are omitted. See full path. Lint: Pre-merge checks:* clang-format: please reformat the code ``` - APInt UndefElts; - SmallVector<APInt, 16>…
	SmallVector<APInt, 16> EltBits;			SmallVector<APInt, 16> EltBits;
	// Extract constant bits and see if they are all			// Extract constant bits and see if they are all
	// sign bit masks. Ignore the undef elements.			// sign bit masks. Ignore the undef elements.
	if (getTargetConstantBitsFromNode(Op1, ScalarSize, UndefElts, EltBits,			if (getTargetConstantBitsFromNode(Op1, ScalarSize, UndefElts, EltBits,
	/* AllowWholeUndefs */ true,			/* AllowWholeUndefs */
	/* AllowPartialUndefs */ false)) {			true,
				/* AllowPartialUndefs */ false)) {
	for (unsigned I = 0, E = EltBits.size(); I < E; I++)			for (unsigned I = 0, E = EltBits.size(); I < E; I++)
	if (!UndefElts[I] && !EltBits[I].isSignMask())			if (!UndefElts[I] && !EltBits[I].isSignMask())
	return SDValue();			return SDValue();

	return peekThroughBitcasts(Op0);			return peekThroughBitcasts(Op0);
	}			}
	}			}
	}			}
	▲ Show 20 Lines • Show All 5,478 Lines • Show Last 20 Lines

llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16 \| FileCheck %s

				pengfeiUnsubmitted Not Done Reply Inline Actions How about `CHECK,NO-SZ` pengfei: How about `CHECK,NO-SZ`
				; FADD(acc, FMA(a, b, 0.0)) can be combined to FMA(a, b, acc) if the fast-math flag set.
				pengfeiUnsubmitted Not Done Reply Inline Actions How about `CHECK,HAS-SZ` pengfei: How about `CHECK,HAS-SZ`
				define dso_local <32 x half> @test1(<32 x half> %acc, <32 x half> %a, <32 x half> %b) {
				; CHECK-LABEL: test1:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: vfcmaddcph %zmm2, %zmm1, %zmm0
				; CHECK-NEXT: retq
				entry:
				%0 = bitcast <32 x half> %a to <16 x float>
				%1 = bitcast <32 x half> %b to <16 x float>
				%2 = tail call fast <16 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.512(<16 x float> zeroinitializer, <16 x float> %0, <16 x float> %1, i16 -1, i32 4)
				%3 = bitcast <16 x float> %2 to <32 x half>
				%add.i = fadd fast <32 x half> %3, %acc
				ret <32 x half> %add.i
				}

				define dso_local <32 x half> @test2(<32 x half> %acc, <32 x half> %a, <32 x half> %b) {
				; CHECK-LABEL: test2:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: vfmaddcph %zmm2, %zmm1, %zmm0
				; CHECK-NEXT: retq
				entry:
				%0 = bitcast <32 x half> %a to <16 x float>
				%1 = bitcast <32 x half> %b to <16 x float>
				%2 = tail call fast <16 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.512(<16 x float> zeroinitializer, <16 x float> %0, <16 x float> %1, i16 -1, i32 4)
				%3 = bitcast <16 x float> %2 to <32 x half>
				%add.i = fadd fast <32 x half> %3, %acc
				ret <32 x half> %add.i
				}

				define dso_local <16 x half> @test3(<16 x half> %acc, <16 x half> %a, <16 x half> %b) {
				; CHECK-LABEL: test3:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: vfcmaddcph %ymm2, %ymm1, %ymm0
				; CHECK-NEXT: retq
				entry:
				%0 = bitcast <16 x half> %a to <8 x float>
				%1 = bitcast <16 x half> %b to <8 x float>
				%2 = tail call fast <8 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.256(<8 x float> zeroinitializer, <8 x float> %0, <8 x float> %1, i8 -1)
				%3 = bitcast <8 x float> %2 to <16 x half>
				%add.i = fadd fast <16 x half> %3, %acc
				ret <16 x half> %add.i
				}

				define dso_local <16 x half> @test4(<16 x half> %acc, <16 x half> %a, <16 x half> %b) {
				; CHECK-LABEL: test4:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: vfmaddcph %ymm2, %ymm1, %ymm0
				; CHECK-NEXT: retq
				entry:
				%0 = bitcast <16 x half> %a to <8 x float>
				%1 = bitcast <16 x half> %b to <8 x float>
				%2 = tail call fast <8 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.256(<8 x float> zeroinitializer, <8 x float> %0, <8 x float> %1, i8 -1)
				%3 = bitcast <8 x float> %2 to <16 x half>
				%add.i = fadd fast <16 x half> %3, %acc
				ret <16 x half> %add.i
				}

				define dso_local <8 x half> @test5(<8 x half> %acc, <8 x half> %a, <8 x half> %b) {
				; CHECK-LABEL: test5:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: vfcmaddcph %xmm2, %xmm1, %xmm0
				; CHECK-NEXT: retq
				entry:
				%0 = bitcast <8 x half> %a to <4 x float>
				%1 = bitcast <8 x half> %b to <4 x float>
				%2 = tail call fast <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.128(<4 x float> zeroinitializer, <4 x float> %0, <4 x float> %1, i8 -1)
				%3 = bitcast <4 x float> %2 to <8 x half>
				%add.i = fadd fast <8 x half> %3, %acc
				ret <8 x half> %add.i
				}

				define dso_local <8 x half> @test6(<8 x half> %acc, <8 x half> %a, <8 x half> %b) {
				; CHECK-LABEL: test6:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: vfmaddcph %xmm2, %xmm1, %xmm0
				; CHECK-NEXT: retq
				entry:
				%0 = bitcast <8 x half> %a to <4 x float>
				%1 = bitcast <8 x half> %b to <4 x float>
				%2 = tail call fast <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.128(<4 x float> zeroinitializer, <4 x float> %0, <4 x float> %1, i8 -1)
				%3 = bitcast <4 x float> %2 to <8 x half>
				%add.i = fadd fast <8 x half> %3, %acc
				ret <8 x half> %add.i
				}

				; FADD(acc, FMA(a, b, 0.0)) shouldn't be combined to FMA(a, b, acc) if the fast-math flag unset.
				define dso_local <32 x half> @test7(<32 x half> %acc, <32 x half> %a, <32 x half> %b) {
				; CHECK-LABEL: test7:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
				; CHECK-NEXT: vfcmaddcph %zmm2, %zmm1, %zmm3
				; CHECK-NEXT: vaddph %zmm0, %zmm3, %zmm0
				; CHECK-NEXT: retq
				entry:
				%0 = bitcast <32 x half> %a to <16 x float>
				%1 = bitcast <32 x half> %b to <16 x float>
				%2 = tail call <16 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.512(<16 x float> zeroinitializer, <16 x float> %0, <16 x float> %1, i16 -1, i32 4)
				%3 = bitcast <16 x float> %2 to <32 x half>
				%add.i = fadd <32 x half> %3, %acc
				ret <32 x half> %add.i
				}

				define dso_local <32 x half> @test8(<32 x half> %acc, <32 x half> %a, <32 x half> %b) {
				; CHECK-LABEL: test8:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
				; CHECK-NEXT: vfmaddcph %zmm2, %zmm1, %zmm3
				; CHECK-NEXT: vaddph %zmm0, %zmm3, %zmm0
				; CHECK-NEXT: retq
				entry:
				%0 = bitcast <32 x half> %a to <16 x float>
				%1 = bitcast <32 x half> %b to <16 x float>
				%2 = tail call <16 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.512(<16 x float> zeroinitializer, <16 x float> %0, <16 x float> %1, i16 -1, i32 4)
				%3 = bitcast <16 x float> %2 to <32 x half>
				%add.i = fadd <32 x half> %3, %acc
				ret <32 x half> %add.i
				}

				define dso_local <16 x half> @test9(<16 x half> %acc, <16 x half> %a, <16 x half> %b) {
				; CHECK-LABEL: test9:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
				; CHECK-NEXT: vfcmaddcph %ymm2, %ymm1, %ymm3
				; CHECK-NEXT: vaddph %ymm0, %ymm3, %ymm0
				; CHECK-NEXT: retq
				entry:
				%0 = bitcast <16 x half> %a to <8 x float>
				%1 = bitcast <16 x half> %b to <8 x float>
				%2 = tail call <8 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.256(<8 x float> zeroinitializer, <8 x float> %0, <8 x float> %1, i8 -1)
				%3 = bitcast <8 x float> %2 to <16 x half>
				%add.i = fadd <16 x half> %3, %acc
				ret <16 x half> %add.i
				}

				define dso_local <16 x half> @test10(<16 x half> %acc, <16 x half> %a, <16 x half> %b) {
				; CHECK-LABEL: test10:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
				; CHECK-NEXT: vfmaddcph %ymm2, %ymm1, %ymm3
				; CHECK-NEXT: vaddph %ymm0, %ymm3, %ymm0
				; CHECK-NEXT: retq
				entry:
				%0 = bitcast <16 x half> %a to <8 x float>
				%1 = bitcast <16 x half> %b to <8 x float>
				%2 = tail call <8 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.256(<8 x float> zeroinitializer, <8 x float> %0, <8 x float> %1, i8 -1)
				%3 = bitcast <8 x float> %2 to <16 x half>
				%add.i = fadd <16 x half> %3, %acc
				ret <16 x half> %add.i
				}

				define dso_local <8 x half> @test11(<8 x half> %acc, <8 x half> %a, <8 x half> %b) {
				; CHECK-LABEL: test11:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
				; CHECK-NEXT: vfcmaddcph %xmm2, %xmm1, %xmm3
				; CHECK-NEXT: vaddph %xmm0, %xmm3, %xmm0
				; CHECK-NEXT: retq
				entry:
				%0 = bitcast <8 x half> %a to <4 x float>
				%1 = bitcast <8 x half> %b to <4 x float>
				%2 = tail call <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.128(<4 x float> zeroinitializer, <4 x float> %0, <4 x float> %1, i8 -1)
				%3 = bitcast <4 x float> %2 to <8 x half>
				%add.i = fadd <8 x half> %3, %acc
				ret <8 x half> %add.i
				}

				define dso_local <8 x half> @test12(<8 x half> %acc, <8 x half> %a, <8 x half> %b) {
				; CHECK-LABEL: test12:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
				; CHECK-NEXT: vfmaddcph %xmm2, %xmm1, %xmm3
				; CHECK-NEXT: vaddph %xmm0, %xmm3, %xmm0
				; CHECK-NEXT: retq
				entry:
				%0 = bitcast <8 x half> %a to <4 x float>
				%1 = bitcast <8 x half> %b to <4 x float>
				%2 = tail call <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.128(<4 x float> zeroinitializer, <4 x float> %0, <4 x float> %1, i8 -1)
				%3 = bitcast <4 x float> %2 to <8 x half>
				%add.i = fadd <8 x half> %3, %acc
				ret <8 x half> %add.i
				}

				; FADD(acc, FMA(a, b, -0.0)) can be combined to FMA(a, b, acc) even if the fast-math flag of FMA is not set.
				; The following testcases cannot be generated by the front-end.
				define dso_local <32 x half> @test13(<32 x half> %acc, <32 x half> %a, <32 x half> %b) {
				; CHECK-LABEL: test13:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: vfcmaddcph %zmm2, %zmm1, %zmm0
				; CHECK-NEXT: retq
				entry:
				%0 = bitcast <32 x half> %a to <16 x float>
				%1 = bitcast <32 x half> %b to <16 x float>
				%2 = tail call <16 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.512(<16 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, <16 x float> %0, <16 x float> %1, i16 -1, i32 4)
				LiuChen3AuthorUnsubmitted Done Reply Inline Actions Should we do this combine standalone? LiuChen3: Should we do this combine standalone?
				%3 = bitcast <16 x float> %2 to <32 x half>
				%add.i = fadd fast <32 x half> %3, %acc
				ret <32 x half> %add.i
				}

				define dso_local <32 x half> @test14(<32 x half> %acc, <32 x half> %a, <32 x half> %b) {
				; CHECK-LABEL: test14:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: vfmaddcph %zmm2, %zmm1, %zmm0
				; CHECK-NEXT: retq
				entry:
				%0 = bitcast <32 x half> %a to <16 x float>
				%1 = bitcast <32 x half> %b to <16 x float>
				%2 = tail call <16 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.512(<16 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, <16 x float> %0, <16 x float> %1, i16 -1, i32 4)
				%3 = bitcast <16 x float> %2 to <32 x half>
				%add.i = fadd fast <32 x half> %3, %acc
				ret <32 x half> %add.i
				}

				define dso_local <16 x half> @test15(<16 x half> %acc, <16 x half> %a, <16 x half> %b) {
				; CHECK-LABEL: test15:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: vfcmaddcph %ymm2, %ymm1, %ymm0
				; CHECK-NEXT: retq
				entry:
				%0 = bitcast <16 x half> %a to <8 x float>
				%1 = bitcast <16 x half> %b to <8 x float>
				%2 = tail call <8 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.256(<8 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, <8 x float> %0, <8 x float> %1, i8 -1)
				%3 = bitcast <8 x float> %2 to <16 x half>
				%add.i = fadd fast <16 x half> %3, %acc
				ret <16 x half> %add.i
				}

				define dso_local <16 x half> @test16(<16 x half> %acc, <16 x half> %a, <16 x half> %b) {
				; CHECK-LABEL: test16:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: vfmaddcph %ymm2, %ymm1, %ymm0
				; CHECK-NEXT: retq
				entry:
				%0 = bitcast <16 x half> %a to <8 x float>
				%1 = bitcast <16 x half> %b to <8 x float>
				%2 = tail call <8 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.256(<8 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, <8 x float> %0, <8 x float> %1, i8 -1)
				%3 = bitcast <8 x float> %2 to <16 x half>
				%add.i = fadd fast <16 x half> %3, %acc
				ret <16 x half> %add.i
				}

				define dso_local <8 x half> @test17(<8 x half> %acc, <8 x half> %a, <8 x half> %b) {
				; CHECK-LABEL: test17:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: vfcmaddcph %xmm2, %xmm1, %xmm0
				; CHECK-NEXT: retq
				entry:
				%0 = bitcast <8 x half> %a to <4 x float>
				%1 = bitcast <8 x half> %b to <4 x float>
				%2 = tail call <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.128(<4 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, <4 x float> %0, <4 x float> %1, i8 -1)
				%3 = bitcast <4 x float> %2 to <8 x half>
				%add.i = fadd fast <8 x half> %3, %acc
				ret <8 x half> %add.i
				}

				define dso_local <8 x half> @test18(<8 x half> %acc, <8 x half> %a, <8 x half> %b) {
				; CHECK-LABEL: test18:
				; CHECK: # %bb.0: # %entry
				; CHECK-NEXT: vfmaddcph %xmm2, %xmm1, %xmm0
				; CHECK-NEXT: retq
				entry:
				%0 = bitcast <8 x half> %a to <4 x float>
				%1 = bitcast <8 x half> %b to <4 x float>
				%2 = tail call <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.128(<4 x float> <float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000, float 0xB790000000000000>, <4 x float> %0, <4 x float> %1, i8 -1)
				%3 = bitcast <4 x float> %2 to <8 x half>
				%add.i = fadd fast <8 x half> %3, %acc
				ret <8 x half> %add.i
				}

				declare <16 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.512(<16 x float>, <16 x float>, <16 x float>, i16, i32 immarg)
				declare <16 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.512(<16 x float>, <16 x float>, <16 x float>, i16, i32 immarg)
				declare <8 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.256(<8 x float>, <8 x float>, <8 x float>, i8)
				declare <8 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.256(<8 x float>, <8 x float>, <8 x float>, i8)
				declare <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.128(<4 x float>, <4 x float>, <4 x float>, i8)
				declare <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.128(<4 x float>, <4 x float>, <4 x float>, i8)

This is an archive of the discontinued LLVM Phabricator instance.

[X86][FP16] Combine the FADD(A, FMA(B, C, 0)) to FMA(B, C, A)
ClosedPublic

Details

Diff Detail

Unit TestsFailed

Event Timeline

Revision Contents

Diff 374158

llvm/lib/Target/X86/X86ISelLowering.cpp

llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll

This is an archive of the discontinued LLVM Phabricator instance.

[X86][FP16] Combine the FADD(A, FMA(B, C, 0)) to FMA(B, C, A)ClosedPublic

Details

Diff Detail

Unit TestsFailed

Event Timeline

Revision Contents

Diff 374158

llvm/lib/Target/X86/X86ISelLowering.cpp

llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll

[X86][FP16] Combine the FADD(A, FMA(B, C, 0)) to FMA(B, C, A)
ClosedPublic