Diff 307513

llvm/include/llvm/CodeGen/TargetLowering.h

	Show First 20 Lines • Show All 4,271 Lines • ▼ Show 20 Lines
	/// If that's true, then return '0' as the number of RefinementSteps to avoid			/// If that's true, then return '0' as the number of RefinementSteps to avoid
	/// any further refinement of the estimate.			/// any further refinement of the estimate.
	/// An empty SDValue return means no estimate sequence can be created.			/// An empty SDValue return means no estimate sequence can be created.
	virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG,			virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
	int Enabled, int &RefinementSteps) const {			int Enabled, int &RefinementSteps) const {
	return SDValue();			return SDValue();
	}			}

				/// Return a target-dependent comparison result if the input operand is
				/// suitable for use with a square root estimate calculation. For example, the
				/// comparison may check if the operand is NAN, INF, zero, normal, etc. The
				spatelUnsubmitted Not Done Reply Inline Actions The description did not read clearly to me. How about: "Return a target-dependent comparison result if the input operand is suitable for use with a square root estimate calculation. For example, the comparison may check if the operand is NAN, INF, zero, normal, etc. The result should be used as the condition operand for a select or branch." If the plan is to generalize this hook for 'ftdiv' as well, then we can make the description less specific. spatel: The description did not read clearly to me. How about: "Return a target-dependent comparison…
				/// result should be used as the condition operand for a select or branch.
				virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
				qiucfUnsubmitted Not Done Reply Inline Actions `testSqrtEstimate`? qiucf: `testSqrtEstimate`?
				steven.zhangAuthorUnsubmitted Done Reply Inline Actions Hmm, I still prefer the getXXX as we have getSqrtEstimate and getRecipEstimate likewise routine. steven.zhang: Hmm, I still prefer the getXXX as we have getSqrtEstimate and getRecipEstimate likewise routine.
				const DenormalMode &Mode) const {
				return SDValue();
				}

	//===--------------------------------------------------------------------===//			//===--------------------------------------------------------------------===//
	// Legalization utility functions			// Legalization utility functions
	//			//

	/// Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes,			/// Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes,
	/// respectively, each computing an n/2-bit part of the result.			/// respectively, each computing an n/2-bit part of the result.
	/// \param Result A vector that will be filled with the parts of the result			/// \param Result A vector that will be filled with the parts of the result
	/// in little-endian order.			/// in little-endian order.
	▲ Show 20 Lines • Show All 280 Lines • Show Last 20 Lines

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 22,050 Lines • ▼ Show 20 Lines	if (Iterations) {
? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)		? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
: buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);		: buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);

if (!Reciprocal) {		if (!Reciprocal) {
// The estimate is now completely wrong if the input was exactly 0.0 or		// The estimate is now completely wrong if the input was exactly 0.0 or
// possibly a denormal. Force the answer to 0.0 for those cases.		// possibly a denormal. Force the answer to 0.0 for those cases.
SDLoc DL(Op);		SDLoc DL(Op);
EVT CCVT = getSetCCResultType(VT);		EVT CCVT = getSetCCResultType(VT);
ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;		SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
DenormalMode DenormMode = DAG.getDenormalMode(VT);		DenormalMode DenormMode = DAG.getDenormalMode(VT);
		// Try the target specific test first.
		SDValue Test = TLI.getSqrtInputTest(Op, DAG, DenormMode);
		qiucfUnsubmitted Not Done Reply Inline Actions Will it be better if put logic below into base `getSqrtInputTest` implementation? qiucf: Will it be better if put logic below into base `getSqrtInputTest` implementation?
		steven.zhangAuthorUnsubmitted Done Reply Inline Actions I think both are ok. The good things for this is to have the target specific test inside getSqrtInputTest() and return SDVaue() if didn't have. We are making the default implementation non-override which makes sense in fact till now. steven.zhang: I think both are ok. The good things for this is to have the target specific test inside…
		if (!Test) {
		// If no test provided by target, testing it with denormal inputs to
		// avoid wrong estimate.
if (DenormMode.Input == DenormalMode::IEEE) {		if (DenormMode.Input == DenormalMode::IEEE) {
// This is specifically a check for the handling of denormal inputs,		// This is specifically a check for the handling of denormal inputs,
// not the result.		// not the result.

// fabs(X) < SmallestNormal ? 0.0 : Est		// Test = fabs(X) < SmallestNormal
const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);		const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);		APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);		SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);		SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);		Test = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);		} else
} else {		// Test = X == 0.0
// X == 0.0 ? 0.0 : Est		Test = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
		steven.zhangAuthorUnsubmitted Done Reply Inline Actions Question here: do we miss to propagate the SDFlags for SETCC which might affect how we lower the select_cc ? steven.zhang: Question here: do we miss to propagate the SDFlags for SETCC which might affect how we lower…
		nemanjaiUnsubmitted Not Done Reply Inline Actions This thread may be relevant here: http://lists.llvm.org/pipermail/llvm-dev/2020-May/141561.html nemanjai: This thread may be relevant here: http://lists.llvm.org/pipermail/llvm-dev/2020-May/141561.html
		spatelUnsubmitted Not Done Reply Inline Actions Yes, I think we are missing propagation of flags on all of the created nodes in this sequence. I don't know if we can induce any test differences from that with current regression tests, but that can be another patch. spatel: Yes, I think we are missing propagation of flags on all of the created nodes in this sequence.
		steven.zhangAuthorUnsubmitted Done Reply Inline Actions PowerPC backend depends on the flags to determine how to lower select_cc between select and cmp+branch. I believe we can see the test difference. And yes, it is another patch. steven.zhang: PowerPC backend depends on the flags to determine how to lower select_cc between select and…
		steven.zhangAuthorUnsubmitted Done Reply Inline Actions This thread may be relevant here: http://lists.llvm.org/pipermail/llvm-dev/2020-May/141561.html Yeah, this seems to be a big hole of DAGCombine. We even don't have parameter to pass the flags for getSetCC now, though it could be set later. steven.zhang: > This thread may be relevant here: http://lists.llvm.org/pipermail/llvm-dev/2020-May/141561.
		steven.zhangAuthorUnsubmitted Done Reply Inline Actions Can we add a verifier in DAG to verify the flag ? i.e. Checking that, there is no flags missed during the dagcombine. steven.zhang: Can we add a verifier in DAG to verify the flag ? i.e. Checking that, there is no flags missed…
		spatelUnsubmitted Not Done Reply Inline Actions I'm not seeing how it would work because the flags are always optional and not necessarily propagated from the operands. But if you see a way to do it, that would be a nice enhancement. spatel: I'm not seeing how it would work because the flags are always optional and not necessarily…
SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
}		}
		// Test ? 0.0 : Est
		Est = DAG.getNode(Test.getValueType().isVector() ? ISD::VSELECT
		: ISD::SELECT,
		DL, VT, Test, FPZero, Est);
}		}
}		}
return Est;		return Est;
}		}

return SDValue();		return SDValue();
}		}

▲ Show 20 Lines • Show All 440 Lines • Show Last 20 Lines

llvm/lib/Target/PowerPC/PPCISelLowering.h

Show First 20 Lines • Show All 83 Lines • ▼ Show 20 Lines	enum NodeType : unsigned {
/// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in		/// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in
/// VSFRC that is sign-extended from ByteWidth to a 64-byte integer.		/// VSFRC that is sign-extended from ByteWidth to a 64-byte integer.
VEXTS,		VEXTS,

/// Reciprocal estimate instructions (unary FP ops).		/// Reciprocal estimate instructions (unary FP ops).
FRE,		FRE,
FRSQRTE,		FRSQRTE,

		/// Test instruction for software square root.
		FTSQRT,

/// VPERM - The PPC VPERM Instruction.		/// VPERM - The PPC VPERM Instruction.
///		///
VPERM,		VPERM,

/// XXSPLT - The PPC VSX splat instructions		/// XXSPLT - The PPC VSX splat instructions
///		///
XXSPLT,		XXSPLT,

▲ Show 20 Lines • Show All 1,178 Lines • ▼ Show 20 Lines	private:
/// (2) keeping the result of comparison in GPR has performance benefit.		/// (2) keeping the result of comparison in GPR has performance benefit.
SDValue ConvertSETCCToSubtract(SDNode *N, DAGCombinerInfo &DCI) const;		SDValue ConvertSETCCToSubtract(SDNode *N, DAGCombinerInfo &DCI) const;

SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,		SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
int &RefinementSteps, bool &UseOneConstNR,		int &RefinementSteps, bool &UseOneConstNR,
bool Reciprocal) const override;		bool Reciprocal) const override;
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,		SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
int &RefinementSteps) const override;		int &RefinementSteps) const override;
		SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
		const DenormalMode &Mode) const override;
unsigned combineRepeatedFPDivisors() const override;		unsigned combineRepeatedFPDivisors() const override;

SDValue		SDValue
combineElementTruncationToVectorTruncation(SDNode *N,		combineElementTruncationToVectorTruncation(SDNode *N,
DAGCombinerInfo &DCI) const;		DAGCombinerInfo &DCI) const;

/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be		/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be
/// handled by the VINSERTH instruction introduced in ISA 3.0. This is		/// handled by the VINSERTH instruction introduced in ISA 3.0. This is
Show All 39 Lines

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 1,441 Lines • ▼ Show 20 Lines const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {

case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ"; case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";

case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ"; case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";

case PPCISD::FP_TO_UINT_IN_VSR: case PPCISD::FP_TO_UINT_IN_VSR:

return "PPCISD::FP_TO_UINT_IN_VSR,"; return "PPCISD::FP_TO_UINT_IN_VSR,";

case PPCISD::FP_TO_SINT_IN_VSR: case PPCISD::FP_TO_SINT_IN_VSR:

return "PPCISD::FP_TO_SINT_IN_VSR"; return "PPCISD::FP_TO_SINT_IN_VSR";

case PPCISD::FRE: return "PPCISD::FRE"; case PPCISD::FRE: return "PPCISD::FRE";

case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE"; case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";

case PPCISD::FTSQRT:

return "PPCISD::FTSQRT";

shchenzUnsubmitted

Done

Better to follow legacy formatting or reformat all the sentences here?

shchenz: Better to follow legacy formatting or reformat all the sentences here?

steven.zhangAuthorUnsubmitted

Done

It is format by clang-format. Maybe, we can commit a NFC patch to format all the statements here.

steven.zhang: It is format by clang-format. Maybe, we can commit a NFC patch to format all the statements…

case PPCISD::STFIWX: return "PPCISD::STFIWX"; case PPCISD::STFIWX: return "PPCISD::STFIWX";

case PPCISD::VPERM: return "PPCISD::VPERM"; case PPCISD::VPERM: return "PPCISD::VPERM";

case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; case PPCISD::XXSPLT: return "PPCISD::XXSPLT";

case PPCISD::XXSPLTI_SP_TO_DP: case PPCISD::XXSPLTI_SP_TO_DP:

return "PPCISD::XXSPLTI_SP_TO_DP"; return "PPCISD::XXSPLTI_SP_TO_DP";

case PPCISD::XXSPLTI32DX: case PPCISD::XXSPLTI32DX:

return "PPCISD::XXSPLTI32DX"; return "PPCISD::XXSPLTI32DX";

case PPCISD::VECINSERT: return "PPCISD::VECINSERT"; case PPCISD::VECINSERT: return "PPCISD::VECINSERT";

▲ Show 20 Lines • Show All 11,295 Lines • ▼ Show 20 Lines static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {

// the minimum architected relative accuracy is 2^-5. When hasRecipPrec(), // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),

// this is 2^-14. IEEE float has 23 digits and double has 52 digits. // this is 2^-14. IEEE float has 23 digits and double has 52 digits.

int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;

if (VT.getScalarType() == MVT::f64) if (VT.getScalarType() == MVT::f64)

RefinementSteps++; RefinementSteps++;

return RefinementSteps; return RefinementSteps;

} }

SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,

const DenormalMode &Mode) const {

// TODO - add support for v2f64/v4f32

EVT VT = Op.getValueType();

if (VT != MVT::f64)

return SDValue();

shchenzUnsubmitted

Not Done

Move the comments above line 12383?

shchenz: Move the comments above line 12383?

steven.zhangAuthorUnsubmitted

Done

Ok, I will do it with later change with this patch.

steven.zhang: Ok, I will do it with later change with this patch.

SDLoc DL(Op);

// The output register of FTSQRT is CR field.

SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);

shchenzUnsubmitted

Done

Any specific reason for i32 here? I guess here i8 would be enough

shchenz: Any specific reason for i32 here? I guess here i8 would be enough

steven.zhangAuthorUnsubmitted

Done

It is because the type of the CRRC must be i32.

def CRRC : RegisterClass<"PPC", [i32], 32,
  (add CR0, CR1, CR5, CR6,
       CR7, CR2, CR3, CR4)> {
  let AltOrders = [(sub CRRC, CR2, CR3, CR4)];
  let AltOrderSelect = [{
    return MF.getSubtarget<PPCSubtarget>().isELFv2ABI() &&
           MF.getInfo<PPCFunctionInfo>()->isNonVolatileCRDisabled();
  }];
}

steven.zhang: It is because the type of the CRRC must be i32. ``` def CRRC : RegisterClass<"PPC", [i32], 32…

// ftsqrt BF,FRB

// Let e_b be the unbiased exponent of the double-precision

// floating-point operand in register FRB.

// fe_flag is set to 1 if either of the following conditions occurs.

// - The double-precision floating-point operand in register FRB is a zero,

// a NaN, or an infinity, or a negative value.

// - e_b is less than or equal to -970.

// Otherwise fe_flag is set to 0.

qiucfUnsubmitted

Not Done

SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);

- // ftsqrt BF,FRB

- // Let e_b be the unbiased exponent of the double-precision

- // floating-point operand in register FRB.

- // fe_flag is set to 1 if either of the following conditions occurs.

- // - The double-precision floating-point operand in register FRB is a zero,

- // a NaN, or an infinity, or a negative value.

- // - e_b is less than or equal to -970.

- // Otherwise fe_flag is set to 0.

+ // Both VSX and non-VSX versions would set EQ bit in the CR if the number is

+ // not eligible for iteration. (zero/negative/infinity/nan or unbiased exponent

+ // is less than -970)

SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);

qiucf:

steven.zhangAuthorUnsubmitted

Done

Thank you for this. I will update it.

steven.zhang: Thank you for this. I will update it.

// Both VSX and non-VSX versions would set EQ bit in the CR if the number is

shchenzUnsubmitted

Done

Target independent code checks denormal input, ftsqrt denormal input checking result is in fg_flag, your comments seems like related to fe_flag, does this matters?

shchenz: Target independent code checks denormal input, `ftsqrt` denormal input checking result is in…

steven.zhangAuthorUnsubmitted

Done

Target independent code didn't assume the content of the check and the target is free to do the kind of check. The ISA contains a program note:

ftdiv and ftsqrt are provided to accelerate software
emulation of divide and square root operations, by
performing the requisite special case checking.
Software needs only a single branch, on FE=1 (in
CR[BF]), to a special case handler. FG and FL may
provide further acceleration opportunities.

So, I select the FE for the special case handle.

steven.zhang: Target independent code didn't assume the content of the check and the target is free to do the…

shchenzUnsubmitted

Not Done

I think there is functionality issue here if we use fe_flag, not fg_flag. From the comments in target independent code:

// The estimate is now completely wrong if the input was exactly 0.0 or
// possibly a denormal. Force the answer to 0.0 for those cases.

The iteration method to calculate the sqrt would be wrong if the input if denormal.

But in PowerPC's hook implementation, fe_flag will not be set even if the input is denormal. So now for denormal input, we may also use the newton iterated est after testing fe_flag.

shchenz: I think there is functionality issue here if we use `fe_flag`, not `fg_flag`. From the comments…

steven.zhangAuthorUnsubmitted

Done

According to http://web.mit.edu/hyperbook/Patrikalakis-Maekawa-Cho/node47.html, the double floating point is denormal if exp < -1022. So, the ftsqrt must return 1 as it is set if e_b <= -970. That means we won't have functionality issue but with precision issue for the value between exp >= -1022 ~ exp <= -970, which is handled by D80974

steven.zhang: According to http://web.mit.edu/hyperbook/Patrikalakis-Maekawa-Cho/node47.html, the double…

shchenzUnsubmitted

Not Done

Thanks for your explanation. So if flag fg_flag is 1, fe_flag must be also 1. For the normal input cases where fe_flag is 1, but fg_flag is 0, you handle them in D80974. This makes sense to me.

shchenz: Thanks for your explanation. So if flag fg_flag is 1, fe_flag must be also 1. For the normal…

// not eligible for iteration. (zero/negative/infinity/nan or unbiased

// exponent is less than -970)

SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);

return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,

FTSQRT, SRIdxVal),

0);

}

SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,

int Enabled, int &RefinementSteps, int Enabled, int &RefinementSteps,

bool &UseOneConstNR, bool &UseOneConstNR,

bool Reciprocal) const { bool Reciprocal) const {

EVT VT = Operand.getValueType(); EVT VT = Operand.getValueType();

if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||

(VT == MVT::f64 && Subtarget.hasFRSQRTE()) || (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||

(VT == MVT::v4f32 && Subtarget.hasAltivec()) || (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||

▲ Show 20 Lines • Show All 4,237 Lines • Show Last 20 Lines

llvm/lib/Target/PowerPC/PPCInstrFormats.td

Show First 20 Lines • Show All 631 Lines • ▼ Show 20 Lines	class XForm_17<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let Inst{9-10} = 0;		let Inst{9-10} = 0;
let Inst{11-15} = FRA;		let Inst{11-15} = FRA;
let Inst{16-20} = FRB;		let Inst{16-20} = FRB;
let Inst{21-30} = xo;		let Inst{21-30} = xo;
let Inst{31} = 0;		let Inst{31} = 0;
}		}

class XForm_17a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,		class XForm_17a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin>		InstrItinClass itin, list<dag> pattern>
: XForm_17<opcode, xo, OOL, IOL, asmstr, itin > {		: XForm_17<opcode, xo, OOL, IOL, asmstr, itin > {
let FRA = 0;		let FRA = 0;
		let Pattern = pattern;
		qiucfUnsubmitted Not Done Reply Inline Actions Typo: extra space qiucf: Typo: extra space
		steven.zhangAuthorUnsubmitted Done Reply Inline Actions Good catch. Update it when I commit it. steven.zhang: Good catch. Update it when I commit it.
}		}

class XForm_18<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,		class XForm_18<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>		InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {		: I<opcode, OOL, IOL, asmstr, itin> {
bits<5> FRT;		bits<5> FRT;
bits<5> FRA;		bits<5> FRA;
bits<5> FRB;		bits<5> FRB;
▲ Show 20 Lines • Show All 1,493 Lines • Show Last 20 Lines

llvm/lib/Target/PowerPC/PPCInstrInfo.td

Show First 20 Lines • Show All 68 Lines • ▼ Show 20 Lines
def SDT_PPCvcmp : SDTypeProfile<1, 3, [		def SDT_PPCvcmp : SDTypeProfile<1, 3, [
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>		SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>
]>;		]>;

def SDT_PPCcondbr : SDTypeProfile<0, 3, [		def SDT_PPCcondbr : SDTypeProfile<0, 3, [
SDTCisVT<0, i32>, SDTCisVT<2, OtherVT>		SDTCisVT<0, i32>, SDTCisVT<2, OtherVT>
]>;		]>;

		def SDT_PPCFtsqrt : SDTypeProfile<1, 1, [
		SDTCisVT<0, i32>]>;

def SDT_PPClbrx : SDTypeProfile<1, 2, [		def SDT_PPClbrx : SDTypeProfile<1, 2, [
SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>		SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
]>;		]>;
def SDT_PPCstbrx : SDTypeProfile<0, 3, [		def SDT_PPCstbrx : SDTypeProfile<0, 3, [
SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>		SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
]>;		]>;

def SDT_PPCTC_ret : SDTypeProfile<0, 2, [		def SDT_PPCTC_ret : SDTypeProfile<0, 2, [
Show All 34 Lines
]>;		]>;

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// PowerPC specific DAG Nodes.		// PowerPC specific DAG Nodes.
//		//

def PPCfre : SDNode<"PPCISD::FRE", SDTFPUnaryOp, []>;		def PPCfre : SDNode<"PPCISD::FRE", SDTFPUnaryOp, []>;
def PPCfrsqrte: SDNode<"PPCISD::FRSQRTE", SDTFPUnaryOp, []>;		def PPCfrsqrte: SDNode<"PPCISD::FRSQRTE", SDTFPUnaryOp, []>;
		def PPCftsqrt : SDNode<"PPCISD::FTSQRT", SDT_PPCFtsqrt,[]>;

def PPCfcfid : SDNode<"PPCISD::FCFID", SDTFPUnaryOp, []>;		def PPCfcfid : SDNode<"PPCISD::FCFID", SDTFPUnaryOp, []>;
def PPCfcfidu : SDNode<"PPCISD::FCFIDU", SDTFPUnaryOp, []>;		def PPCfcfidu : SDNode<"PPCISD::FCFIDU", SDTFPUnaryOp, []>;
def PPCfcfids : SDNode<"PPCISD::FCFIDS", SDTFPRoundOp, []>;		def PPCfcfids : SDNode<"PPCISD::FCFIDS", SDTFPRoundOp, []>;
def PPCfcfidus: SDNode<"PPCISD::FCFIDUS", SDTFPRoundOp, []>;		def PPCfcfidus: SDNode<"PPCISD::FCFIDUS", SDTFPRoundOp, []>;
def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>;		def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>;
def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>;		def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>;
def PPCfctiduz: SDNode<"PPCISD::FCTIDUZ",SDTFPUnaryOp, []>;		def PPCfctiduz: SDNode<"PPCISD::FCTIDUZ",SDTFPUnaryOp, []>;
▲ Show 20 Lines • Show All 2,503 Lines • ▼ Show 20 Lines	let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
def FCMPOD : XForm_17<63, 32, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB),		def FCMPOD : XForm_17<63, 32, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB),
"fcmpo $crD, $fA, $fB", IIC_FPCompare>;		"fcmpo $crD, $fA, $fB", IIC_FPCompare>;
}		}
}		}

def FTDIV: XForm_17<63, 128, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB),		def FTDIV: XForm_17<63, 128, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB),
"ftdiv $crD, $fA, $fB", IIC_FPCompare>;		"ftdiv $crD, $fA, $fB", IIC_FPCompare>;
def FTSQRT: XForm_17a<63, 160, (outs crrc:$crD), (ins f8rc:$fB),		def FTSQRT: XForm_17a<63, 160, (outs crrc:$crD), (ins f8rc:$fB),
"ftsqrt $crD, $fB", IIC_FPCompare>;		"ftsqrt $crD, $fB", IIC_FPCompare,
		[(set i32:$crD, (PPCftsqrt f64:$fB))]>;

let mayRaiseFPException = 1, hasSideEffects = 0 in {		let mayRaiseFPException = 1, hasSideEffects = 0 in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in		let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm FRIND : XForm_26r<63, 392, (outs f8rc:$frD), (ins f8rc:$frB),		defm FRIND : XForm_26r<63, 392, (outs f8rc:$frD), (ins f8rc:$frB),
"frin", "$frD, $frB", IIC_FPGeneral,		"frin", "$frD, $frB", IIC_FPGeneral,
[(set f64:$frD, (any_fround f64:$frB))]>;		[(set f64:$frD, (any_fround f64:$frB))]>;
defm FRINS : XForm_26r<63, 392, (outs f4rc:$frD), (ins f4rc:$frB),		defm FRINS : XForm_26r<63, 392, (outs f4rc:$frD), (ins f4rc:$frB),
"frin", "$frD, $frB", IIC_FPGeneral,		"frin", "$frD, $frB", IIC_FPGeneral,
▲ Show 20 Lines • Show All 2,617 Lines • Show Last 20 Lines

llvm/lib/Target/PowerPC/PPCInstrVSX.td

Show First 20 Lines • Show All 623 Lines • ▼ Show 20 Lines	def XSRSQRTEDP : XX2Form<60, 74,
[(set f64:$XT, (PPCfrsqrte f64:$XB))]>;		[(set f64:$XT, (PPCfrsqrte f64:$XB))]>;

let mayRaiseFPException = 0 in {		let mayRaiseFPException = 0 in {
def XSTDIVDP : XX3Form_1<60, 61,		def XSTDIVDP : XX3Form_1<60, 61,
(outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),		(outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
"xstdivdp $crD, $XA, $XB", IIC_FPCompare, []>;		"xstdivdp $crD, $XA, $XB", IIC_FPCompare, []>;
def XSTSQRTDP : XX2Form_1<60, 106,		def XSTSQRTDP : XX2Form_1<60, 106,
(outs crrc:$crD), (ins vsfrc:$XB),		(outs crrc:$crD), (ins vsfrc:$XB),
"xstsqrtdp $crD, $XB", IIC_FPCompare, []>;		"xstsqrtdp $crD, $XB", IIC_FPCompare,
		[(set i32:$crD, (PPCftsqrt f64:$XB))]>;
		shchenzUnsubmitted Done Reply Inline Actions Same as above, i8 should be enough for crD? shchenz: Same as above, i8 should be enough for crD?
def XVTDIVDP : XX3Form_1<60, 125,		def XVTDIVDP : XX3Form_1<60, 125,
(outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),		(outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
"xvtdivdp $crD, $XA, $XB", IIC_FPCompare, []>;		"xvtdivdp $crD, $XA, $XB", IIC_FPCompare, []>;
def XVTDIVSP : XX3Form_1<60, 93,		def XVTDIVSP : XX3Form_1<60, 93,
(outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),		(outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
"xvtdivsp $crD, $XA, $XB", IIC_FPCompare, []>;		"xvtdivsp $crD, $XA, $XB", IIC_FPCompare, []>;

def XVTSQRTDP : XX2Form_1<60, 234,		def XVTSQRTDP : XX2Form_1<60, 234,
▲ Show 20 Lines • Show All 4,022 Lines • Show Last 20 Lines

llvm/test/CodeGen/PowerPC/fma-mutate.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx -disable-ppc-vsx-fma-mutation=false \| FileCheck %s			; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx -disable-ppc-vsx-fma-mutation=false \| FileCheck %s

	declare double @llvm.sqrt.f64(double)			declare double @llvm.sqrt.f64(double)

	; Test several VSX FMA mutation opportunities.			; Test several VSX FMA mutation opportunities.

	; This is reasonable transformation since it eliminates extra register copy.			; This is reasonable transformation since it eliminates extra register copy.
	define double @foo3_fmf(double %a) nounwind {			define double @foo3_fmf(double %a) nounwind {
	; CHECK-LABEL: foo3_fmf:			; CHECK-LABEL: foo3_fmf:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: xsabsdp 0, 1			; CHECK-NEXT: xstsqrtdp 0, 1
	; CHECK-NEXT: addis 3, 2, .LCPI0_2@toc@ha
	; CHECK-NEXT: lfd 2, .LCPI0_2@toc@l(3)
	; CHECK-NEXT: xscmpudp 0, 0, 2
	; CHECK-NEXT: xxlxor 0, 0, 0			; CHECK-NEXT: xxlxor 0, 0, 0
	; CHECK-NEXT: blt 0, .LBB0_2			; CHECK-NEXT: bc 12, 2, .LBB0_2
	; CHECK-NEXT: # %bb.1:			; CHECK-NEXT: # %bb.1:
	; CHECK-NEXT: xsrsqrtedp 0, 1			; CHECK-NEXT: xsrsqrtedp 0, 1
	; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha			; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha
	; CHECK-NEXT: lfs 3, .LCPI0_0@toc@l(3)			; CHECK-NEXT: lfs 3, .LCPI0_0@toc@l(3)
	; CHECK-NEXT: addis 3, 2, .LCPI0_1@toc@ha			; CHECK-NEXT: addis 3, 2, .LCPI0_1@toc@ha
	; CHECK-NEXT: lfs 4, .LCPI0_1@toc@l(3)			; CHECK-NEXT: lfs 4, .LCPI0_1@toc@l(3)
	; CHECK-NEXT: xsmuldp 2, 1, 0			; CHECK-NEXT: xsmuldp 2, 1, 0
	; CHECK-NEXT: xsmaddmdp 2, 0, 3			; CHECK-NEXT: xsmaddmdp 2, 0, 3
	Show All 22 Lines

llvm/test/CodeGen/PowerPC/recipest.ll

Show First 20 Lines • Show All 743 Lines • ▼ Show 20 Lines
; CHECK-P9-NEXT: blr		; CHECK-P9-NEXT: blr
%r = fdiv <4 x float> %a, %b		%r = fdiv <4 x float> %a, %b
ret <4 x float> %r		ret <4 x float> %r
}		}

define double @foo3_fmf(double %a) nounwind {		define double @foo3_fmf(double %a) nounwind {
; CHECK-P7-LABEL: foo3_fmf:		; CHECK-P7-LABEL: foo3_fmf:
; CHECK-P7: # %bb.0:		; CHECK-P7: # %bb.0:
; CHECK-P7-NEXT: fabs 0, 1		; CHECK-P7-NEXT: ftsqrt 0, 1
; CHECK-P7-NEXT: addis 3, 2, .LCPI20_2@toc@ha		; CHECK-P7-NEXT: bc 12, 2, .LBB20_2
; CHECK-P7-NEXT: lfd 2, .LCPI20_2@toc@l(3)
; CHECK-P7-NEXT: fcmpu 0, 0, 2
; CHECK-P7-NEXT: blt 0, .LBB20_2
; CHECK-P7-NEXT: # %bb.1:		; CHECK-P7-NEXT: # %bb.1:
; CHECK-P7-NEXT: frsqrte 0, 1		; CHECK-P7-NEXT: frsqrte 0, 1
; CHECK-P7-NEXT: addis 3, 2, .LCPI20_0@toc@ha		; CHECK-P7-NEXT: addis 3, 2, .LCPI20_0@toc@ha
; CHECK-P7-NEXT: addis 4, 2, .LCPI20_1@toc@ha		; CHECK-P7-NEXT: addis 4, 2, .LCPI20_1@toc@ha
; CHECK-P7-NEXT: lfs 3, .LCPI20_0@toc@l(3)		; CHECK-P7-NEXT: lfs 3, .LCPI20_0@toc@l(3)
; CHECK-P7-NEXT: lfs 4, .LCPI20_1@toc@l(4)		; CHECK-P7-NEXT: lfs 4, .LCPI20_1@toc@l(4)
; CHECK-P7-NEXT: fmul 2, 1, 0		; CHECK-P7-NEXT: fmul 2, 1, 0
; CHECK-P7-NEXT: fmadd 2, 2, 0, 3		; CHECK-P7-NEXT: fmadd 2, 2, 0, 3
; CHECK-P7-NEXT: fmul 0, 0, 4		; CHECK-P7-NEXT: fmul 0, 0, 4
; CHECK-P7-NEXT: fmul 0, 0, 2		; CHECK-P7-NEXT: fmul 0, 0, 2
; CHECK-P7-NEXT: fmul 1, 1, 0		; CHECK-P7-NEXT: fmul 1, 1, 0
; CHECK-P7-NEXT: fmadd 0, 1, 0, 3		; CHECK-P7-NEXT: fmadd 0, 1, 0, 3
; CHECK-P7-NEXT: fmul 1, 1, 4		; CHECK-P7-NEXT: fmul 1, 1, 4
; CHECK-P7-NEXT: fmul 1, 1, 0		; CHECK-P7-NEXT: fmul 1, 1, 0
; CHECK-P7-NEXT: blr		; CHECK-P7-NEXT: blr
; CHECK-P7-NEXT: .LBB20_2:		; CHECK-P7-NEXT: .LBB20_2:
; CHECK-P7-NEXT: addis 3, 2, .LCPI20_3@toc@ha		; CHECK-P7-NEXT: addis 3, 2, .LCPI20_2@toc@ha
; CHECK-P7-NEXT: lfs 1, .LCPI20_3@toc@l(3)		; CHECK-P7-NEXT: lfs 1, .LCPI20_2@toc@l(3)
; CHECK-P7-NEXT: blr		; CHECK-P7-NEXT: blr
;		;
; CHECK-P8-LABEL: foo3_fmf:		; CHECK-P8-LABEL: foo3_fmf:
; CHECK-P8: # %bb.0:		; CHECK-P8: # %bb.0:
; CHECK-P8-NEXT: xsabsdp 0, 1		; CHECK-P8-NEXT: xstsqrtdp 0, 1
; CHECK-P8-NEXT: addis 3, 2, .LCPI20_2@toc@ha
; CHECK-P8-NEXT: lfd 2, .LCPI20_2@toc@l(3)
; CHECK-P8-NEXT: xscmpudp 0, 0, 2
; CHECK-P8-NEXT: xxlxor 0, 0, 0		; CHECK-P8-NEXT: xxlxor 0, 0, 0
; CHECK-P8-NEXT: blt 0, .LBB20_2		; CHECK-P8-NEXT: bc 12, 2, .LBB20_2
; CHECK-P8-NEXT: # %bb.1:		; CHECK-P8-NEXT: # %bb.1:
; CHECK-P8-NEXT: xsrsqrtedp 0, 1		; CHECK-P8-NEXT: xsrsqrtedp 0, 1
; CHECK-P8-NEXT: addis 3, 2, .LCPI20_0@toc@ha		; CHECK-P8-NEXT: addis 3, 2, .LCPI20_0@toc@ha
; CHECK-P8-NEXT: lfs 3, .LCPI20_0@toc@l(3)		; CHECK-P8-NEXT: lfs 3, .LCPI20_0@toc@l(3)
; CHECK-P8-NEXT: addis 3, 2, .LCPI20_1@toc@ha		; CHECK-P8-NEXT: addis 3, 2, .LCPI20_1@toc@ha
; CHECK-P8-NEXT: lfs 4, .LCPI20_1@toc@l(3)		; CHECK-P8-NEXT: lfs 4, .LCPI20_1@toc@l(3)
; CHECK-P8-NEXT: fmr 5, 3		; CHECK-P8-NEXT: fmr 5, 3
; CHECK-P8-NEXT: xsmuldp 2, 1, 0		; CHECK-P8-NEXT: xsmuldp 2, 1, 0
; CHECK-P8-NEXT: xsmaddadp 5, 2, 0		; CHECK-P8-NEXT: xsmaddadp 5, 2, 0
; CHECK-P8-NEXT: xsmuldp 0, 0, 4		; CHECK-P8-NEXT: xsmuldp 0, 0, 4
; CHECK-P8-NEXT: xsmuldp 0, 0, 5		; CHECK-P8-NEXT: xsmuldp 0, 0, 5
; CHECK-P8-NEXT: xsmuldp 1, 1, 0		; CHECK-P8-NEXT: xsmuldp 1, 1, 0
; CHECK-P8-NEXT: xsmaddadp 3, 1, 0		; CHECK-P8-NEXT: xsmaddadp 3, 1, 0
; CHECK-P8-NEXT: xsmuldp 0, 1, 4		; CHECK-P8-NEXT: xsmuldp 0, 1, 4
; CHECK-P8-NEXT: xsmuldp 0, 0, 3		; CHECK-P8-NEXT: xsmuldp 0, 0, 3
; CHECK-P8-NEXT: .LBB20_2:		; CHECK-P8-NEXT: .LBB20_2:
; CHECK-P8-NEXT: fmr 1, 0		; CHECK-P8-NEXT: fmr 1, 0
; CHECK-P8-NEXT: blr		; CHECK-P8-NEXT: blr
;		;
; CHECK-P9-LABEL: foo3_fmf:		; CHECK-P9-LABEL: foo3_fmf:
; CHECK-P9: # %bb.0:		; CHECK-P9: # %bb.0:
; CHECK-P9-NEXT: addis 3, 2, .LCPI20_2@toc@ha		; CHECK-P9-NEXT: xstsqrtdp 0, 1
; CHECK-P9-NEXT: xsabsdp 0, 1
; CHECK-P9-NEXT: lfd 2, .LCPI20_2@toc@l(3)
; CHECK-P9-NEXT: xscmpudp 0, 0, 2
; CHECK-P9-NEXT: xxlxor 0, 0, 0		; CHECK-P9-NEXT: xxlxor 0, 0, 0
; CHECK-P9-NEXT: blt 0, .LBB20_2		; CHECK-P9-NEXT: bc 12, 2, .LBB20_2
; CHECK-P9-NEXT: # %bb.1:		; CHECK-P9-NEXT: # %bb.1:
; CHECK-P9-NEXT: xsrsqrtedp 0, 1		; CHECK-P9-NEXT: xsrsqrtedp 0, 1
; CHECK-P9-NEXT: addis 3, 2, .LCPI20_0@toc@ha		; CHECK-P9-NEXT: addis 3, 2, .LCPI20_0@toc@ha
; CHECK-P9-NEXT: lfs 3, .LCPI20_0@toc@l(3)		; CHECK-P9-NEXT: lfs 3, .LCPI20_0@toc@l(3)
; CHECK-P9-NEXT: addis 3, 2, .LCPI20_1@toc@ha		; CHECK-P9-NEXT: addis 3, 2, .LCPI20_1@toc@ha
; CHECK-P9-NEXT: xsmuldp 2, 1, 0		; CHECK-P9-NEXT: xsmuldp 2, 1, 0
; CHECK-P9-NEXT: fmr 4, 3		; CHECK-P9-NEXT: fmr 4, 3
; CHECK-P9-NEXT: xsmaddadp 4, 2, 0		; CHECK-P9-NEXT: xsmaddadp 4, 2, 0
▲ Show 20 Lines • Show All 213 Lines • ▼ Show 20 Lines	; CHECK-P9-NEXT: blr
%r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)		%r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
ret <4 x float> %r		ret <4 x float> %r
}		}

define <2 x double> @hoo4_fmf(<2 x double> %a) #1 {		define <2 x double> @hoo4_fmf(<2 x double> %a) #1 {
; CHECK-P7-LABEL: hoo4_fmf:		; CHECK-P7-LABEL: hoo4_fmf:
; CHECK-P7: # %bb.0:		; CHECK-P7: # %bb.0:
; CHECK-P7-NEXT: addis 3, 2, .LCPI26_2@toc@ha		; CHECK-P7-NEXT: addis 3, 2, .LCPI26_2@toc@ha
		; CHECK-P7-NEXT: ftsqrt 0, 1
; CHECK-P7-NEXT: fmr 3, 1		; CHECK-P7-NEXT: fmr 3, 1
; CHECK-P7-NEXT: addis 4, 2, .LCPI26_1@toc@ha		; CHECK-P7-NEXT: addis 4, 2, .LCPI26_0@toc@ha
; CHECK-P7-NEXT: lfs 0, .LCPI26_2@toc@l(3)		; CHECK-P7-NEXT: lfs 0, .LCPI26_2@toc@l(3)
; CHECK-P7-NEXT: addis 3, 2, .LCPI26_0@toc@ha		; CHECK-P7-NEXT: addis 3, 2, .LCPI26_1@toc@ha
; CHECK-P7-NEXT: lfs 4, .LCPI26_1@toc@l(4)		; CHECK-P7-NEXT: lfs 5, .LCPI26_0@toc@l(4)
; CHECK-P7-NEXT: lfs 5, .LCPI26_0@toc@l(3)		; CHECK-P7-NEXT: lfs 4, .LCPI26_1@toc@l(3)
; CHECK-P7-NEXT: fcmpu 0, 1, 0
; CHECK-P7-NEXT: fmr 1, 0		; CHECK-P7-NEXT: fmr 1, 0
; CHECK-P7-NEXT: bne 0, .LBB26_3		; CHECK-P7-NEXT: bc 4, 2, .LBB26_3
; CHECK-P7-NEXT: # %bb.1:		; CHECK-P7-NEXT: # %bb.1:
; CHECK-P7-NEXT: fcmpu 0, 2, 0		; CHECK-P7-NEXT: ftsqrt 0, 2
; CHECK-P7-NEXT: bne 0, .LBB26_4		; CHECK-P7-NEXT: bc 4, 2, .LBB26_4
; CHECK-P7-NEXT: .LBB26_2:		; CHECK-P7-NEXT: .LBB26_2:
; CHECK-P7-NEXT: fmr 2, 0		; CHECK-P7-NEXT: fmr 2, 0
; CHECK-P7-NEXT: blr		; CHECK-P7-NEXT: blr
; CHECK-P7-NEXT: .LBB26_3:		; CHECK-P7-NEXT: .LBB26_3:
; CHECK-P7-NEXT: frsqrte 1, 3		; CHECK-P7-NEXT: frsqrte 1, 3
; CHECK-P7-NEXT: fmul 6, 3, 1		; CHECK-P7-NEXT: fmul 6, 3, 1
; CHECK-P7-NEXT: fmadd 6, 6, 1, 5		; CHECK-P7-NEXT: fmadd 6, 6, 1, 5
; CHECK-P7-NEXT: fmul 1, 1, 4		; CHECK-P7-NEXT: fmul 1, 1, 4
; CHECK-P7-NEXT: fmul 1, 1, 6		; CHECK-P7-NEXT: fmul 1, 1, 6
; CHECK-P7-NEXT: fmul 3, 3, 1		; CHECK-P7-NEXT: fmul 3, 3, 1
; CHECK-P7-NEXT: fmadd 1, 3, 1, 5		; CHECK-P7-NEXT: fmadd 1, 3, 1, 5
; CHECK-P7-NEXT: fmul 3, 3, 4		; CHECK-P7-NEXT: fmul 3, 3, 4
; CHECK-P7-NEXT: fmul 1, 3, 1		; CHECK-P7-NEXT: fmul 1, 3, 1
; CHECK-P7-NEXT: fcmpu 0, 2, 0		; CHECK-P7-NEXT: ftsqrt 0, 2
; CHECK-P7-NEXT: beq 0, .LBB26_2		; CHECK-P7-NEXT: bc 12, 2, .LBB26_2
; CHECK-P7-NEXT: .LBB26_4:		; CHECK-P7-NEXT: .LBB26_4:
; CHECK-P7-NEXT: frsqrte 0, 2		; CHECK-P7-NEXT: frsqrte 0, 2
; CHECK-P7-NEXT: fmul 3, 2, 0		; CHECK-P7-NEXT: fmul 3, 2, 0
; CHECK-P7-NEXT: fmadd 3, 3, 0, 5		; CHECK-P7-NEXT: fmadd 3, 3, 0, 5
; CHECK-P7-NEXT: fmul 0, 0, 4		; CHECK-P7-NEXT: fmul 0, 0, 4
; CHECK-P7-NEXT: fmul 0, 0, 3		; CHECK-P7-NEXT: fmul 0, 0, 3
; CHECK-P7-NEXT: fmul 2, 2, 0		; CHECK-P7-NEXT: fmul 2, 2, 0
; CHECK-P7-NEXT: fmadd 0, 2, 0, 5		; CHECK-P7-NEXT: fmadd 0, 2, 0, 5
▲ Show 20 Lines • Show All 144 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[DAGCombine] Add hook to allow target specific test for sqrt input
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 307513

llvm/include/llvm/CodeGen/TargetLowering.h

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

llvm/lib/Target/PowerPC/PPCISelLowering.h

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

llvm/lib/Target/PowerPC/PPCInstrFormats.td

llvm/lib/Target/PowerPC/PPCInstrInfo.td

llvm/lib/Target/PowerPC/PPCInstrVSX.td

llvm/test/CodeGen/PowerPC/fma-mutate.ll

llvm/test/CodeGen/PowerPC/recipest.ll

This is an archive of the discontinued LLVM Phabricator instance.

[DAGCombine] Add hook to allow target specific test for sqrt inputClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 307513

llvm/include/llvm/CodeGen/TargetLowering.h

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

llvm/lib/Target/PowerPC/PPCISelLowering.h

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

llvm/lib/Target/PowerPC/PPCInstrFormats.td

llvm/lib/Target/PowerPC/PPCInstrInfo.td

llvm/lib/Target/PowerPC/PPCInstrVSX.td

llvm/test/CodeGen/PowerPC/fma-mutate.ll

llvm/test/CodeGen/PowerPC/recipest.ll

[DAGCombine] Add hook to allow target specific test for sqrt input
ClosedPublic