Diff 32324

lib/Target/X86/X86ISelLowering.h

Show First 20 Lines • Show All 847 Lines • ▼ Show 20 Lines	public:

/// Return true if the specified scalar FP type is computed in an SSE		/// Return true if the specified scalar FP type is computed in an SSE
/// register, not on the X87 floating point stack.		/// register, not on the X87 floating point stack.
bool isScalarFPTypeInSSEReg(EVT VT) const {		bool isScalarFPTypeInSSEReg(EVT VT) const {
return (VT == MVT::f64 && X86ScalarSSEf64) \|\| // f64 is when SSE2		return (VT == MVT::f64 && X86ScalarSSEf64) \|\| // f64 is when SSE2
(VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1		(VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
}		}

/// Return true if the target uses the MSVC _ftol2 routine for fptoui.
bool isTargetFTOL() const;

/// Return true if the MSVC _ftol2 routine should be used for fptoui to the
/// given type.
bool isIntegerTypeFTOL(EVT VT) const {
return isTargetFTOL() && VT == MVT::i64;
}

/// \brief Returns true if it is beneficial to convert a load of a constant		/// \brief Returns true if it is beneficial to convert a load of a constant
/// to just the constant itself.		/// to just the constant itself.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,		bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;		Type *Ty) const override;

/// Return true if EXTRACT_SUBVECTOR is cheap for this result type		/// Return true if EXTRACT_SUBVECTOR is cheap for this result type
/// with this index.		/// with this index.
bool isExtractSubvectorCheap(EVT ResVT, unsigned Index) const override;		bool isExtractSubvectorCheap(EVT ResVT, unsigned Index) const override;
▲ Show 20 Lines • Show All 261 Lines • Show Last 20 Lines

lib/Target/X86/X86ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 112 Lines • ▼ Show 20 Lines	if (Subtarget->isTargetKnownWindowsMSVC()) {
setLibcallName(RTLIB::SREM_I64, "_allrem");		setLibcallName(RTLIB::SREM_I64, "_allrem");
setLibcallName(RTLIB::UREM_I64, "_aullrem");		setLibcallName(RTLIB::UREM_I64, "_aullrem");
setLibcallName(RTLIB::MUL_I64, "_allmul");		setLibcallName(RTLIB::MUL_I64, "_allmul");
setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);		setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);		setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);		setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);		setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);		setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);

// The _ftol2 runtime function has an unusual calling conv, which
// is modeled by a special pseudo-instruction.
setLibcallName(RTLIB::FPTOUINT_F64_I64, nullptr);
setLibcallName(RTLIB::FPTOUINT_F32_I64, nullptr);
setLibcallName(RTLIB::FPTOUINT_F64_I32, nullptr);
setLibcallName(RTLIB::FPTOUINT_F32_I32, nullptr);
}		}

if (Subtarget->isTargetDarwin()) {		if (Subtarget->isTargetDarwin()) {
// Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.		// Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
setUseUnderscoreSetJmp(false);		setUseUnderscoreSetJmp(false);
setUseUnderscoreLongJmp(false);		setUseUnderscoreLongJmp(false);
} else if (Subtarget->isTargetWindowsGNU()) {		} else if (Subtarget->isTargetWindowsGNU()) {
// MS runtime is weird: it exports _setjmp, but longjmp!		// MS runtime is weird: it exports _setjmp, but longjmp!
▲ Show 20 Lines • Show All 91 Lines • ▼ Show 20 Lines	X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,

// Handle FP_TO_UINT by promoting the destination to a larger signed		// Handle FP_TO_UINT by promoting the destination to a larger signed
// conversion.		// conversion.
setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);		setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);		setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);		setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);

if (Subtarget->is64Bit()) {		if (Subtarget->is64Bit()) {
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);		if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) {
		// FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
		setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
		setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
		} else {
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);		setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
		setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
		}
} else if (!Subtarget->useSoftFloat()) {		} else if (!Subtarget->useSoftFloat()) {
// Since AVX is a superset of SSE3, only check for SSE here.		// Since AVX is a superset of SSE3, only check for SSE here.
if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())		if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
// Expand FP_TO_UINT into a select.		// Expand FP_TO_UINT into a select.
// FIXME: We would like to use a Custom expander here eventually to do		// FIXME: We would like to use a Custom expander here eventually to do
// the optimal thing for SSE vs. the default expansion in the legalizer.		// the optimal thing for SSE vs. the default expansion in the legalizer.
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);		setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
else		else
		// With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
// With SSE3 we can use fisttpll to convert to a signed i64; without		// With SSE3 we can use fisttpll to convert to a signed i64; without
// SSE, we're stuck with a fistpll.		// SSE, we're stuck with a fistpll.
setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);		setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
}

if (isTargetFTOL()) {
// Use the _ftol2 runtime function, which has a pseudo-instruction
// to handle its weird calling convention.
setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);		setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
}		}

// TODO: when we have SSE, these could be more efficient, by using movd/movq.		// TODO: when we have SSE, these could be more efficient, by using movd/movq.
if (!X86ScalarSSEf64) {		if (!X86ScalarSSEf64) {
setOperationAction(ISD::BITCAST , MVT::f32 , Expand);		setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
setOperationAction(ISD::BITCAST , MVT::i32 , Expand);		setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
if (Subtarget->is64Bit()) {		if (Subtarget->is64Bit()) {
▲ Show 20 Lines • Show All 1,055 Lines • ▼ Show 20 Lines	if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) {
setOperationAction(ISD::FSUB, MVT::v8f64, Legal);		setOperationAction(ISD::FSUB, MVT::v8f64, Legal);
setOperationAction(ISD::FMUL, MVT::v8f64, Legal);		setOperationAction(ISD::FMUL, MVT::v8f64, Legal);
setOperationAction(ISD::FDIV, MVT::v8f64, Legal);		setOperationAction(ISD::FDIV, MVT::v8f64, Legal);
setOperationAction(ISD::FSQRT, MVT::v8f64, Legal);		setOperationAction(ISD::FSQRT, MVT::v8f64, Legal);
setOperationAction(ISD::FNEG, MVT::v8f64, Custom);		setOperationAction(ISD::FNEG, MVT::v8f64, Custom);
setOperationAction(ISD::FMA, MVT::v8f64, Legal);		setOperationAction(ISD::FMA, MVT::v8f64, Legal);
setOperationAction(ISD::FMA, MVT::v16f32, Legal);		setOperationAction(ISD::FMA, MVT::v16f32, Legal);

setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);		// FIXME: [US]INT_TO_FP are not legal for f80.
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);		setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);		setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
if (Subtarget->is64Bit()) {		if (Subtarget->is64Bit()) {
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal);		setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal);		setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal);
}		}
setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);		setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);		setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);		setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);		setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);		setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
▲ Show 20 Lines • Show All 10,888 Lines • ▼ Show 20 Lines	SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(),
FudgePtr, MachinePointerInfo::getConstantPool(),		FudgePtr, MachinePointerInfo::getConstantPool(),
MVT::f32, false, false, false, 4);		MVT::f32, false, false, false, 4);
// Extend everything to 80 bits to force it to be done on x87.		// Extend everything to 80 bits to force it to be done on x87.
SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);		SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add,		return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add,
DAG.getIntPtrConstant(0, dl));		DAG.getIntPtrConstant(0, dl));
}		}

		// If the given FP_TO_SINT (IsSigned) or FP_TO_UINT (!IsSigned) operation
		// is legal, or has an f16 source (which needs to be promoted to f32),
		// just return an <SDValue(), SDValue()> pair.
		// Otherwise it is assumed to be a conversion from one of f32, f64 or f80
		// to i16, i32 or i64, and we lower it to a legal sequence.
		// If lowered to the final integer result we return a <result, SDValue()> pair.
		// Otherwise we lower it to a sequence ending with a FIST, return a
		// <FIST, StackSlot> pair, and the caller is responsible for loading
		// the final integer result from StackSlot.
std::pair<SDValue,SDValue>		std::pair<SDValue,SDValue>
X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,		X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
bool IsSigned, bool IsReplace) const {		bool IsSigned, bool IsReplace) const {
SDLoc DL(Op);		SDLoc DL(Op);

EVT DstTy = Op.getValueType();		EVT DstTy = Op.getValueType();
		EVT TheVT = Op.getOperand(0).getValueType();
auto PtrVT = getPointerTy(DAG.getDataLayout());		auto PtrVT = getPointerTy(DAG.getDataLayout());

if (!IsSigned && !isIntegerTypeFTOL(DstTy)) {		if (TheVT == MVT::f16)
		// We need to promote the f16 to f32 before using the lowering
		// in this routine.
		return std::make_pair(SDValue(), SDValue());

		assert((TheVT == MVT::f32 \|\|
		TheVT == MVT::f64 \|\|
		TheVT == MVT::f80) &&
		"Unexpected FP operand type in FP_TO_INTHelper");

		// If using FIST to compute an unsigned i64, we'll need some fixup
		// to handle values above the maximum signed i64. A FIST is always
		// used for the 32-bit subtarget, but also for f80 on a 64-bit target.
		bool UnsignedFixup = !IsSigned &&
		DstTy == MVT::i64 &&
		(!Subtarget->is64Bit() \|\|
		!isScalarFPTypeInSSEReg(TheVT));

		if (!IsSigned && DstTy != MVT::i64 && !Subtarget->hasAVX512()) {
		// Replace the fp-to-uint32 operation with an fp-to-sint64 FIST.
		// The low 32 bits of the fist result will have the correct uint32 result.
assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT");		assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT");
DstTy = MVT::i64;		DstTy = MVT::i64;
}		}

assert(DstTy.getSimpleVT() <= MVT::i64 &&		assert(DstTy.getSimpleVT() <= MVT::i64 &&
DstTy.getSimpleVT() >= MVT::i16 &&		DstTy.getSimpleVT() >= MVT::i16 &&
"Unknown FP_TO_INT to lower!");		"Unknown FP_TO_INT to lower!");

// These are really Legal.		// These are really Legal.
if (DstTy == MVT::i32 &&		if (DstTy == MVT::i32 &&
isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))		isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
return std::make_pair(SDValue(), SDValue());		return std::make_pair(SDValue(), SDValue());
if (Subtarget->is64Bit() &&		if (Subtarget->is64Bit() &&
DstTy == MVT::i64 &&		DstTy == MVT::i64 &&
isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))		isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
return std::make_pair(SDValue(), SDValue());		return std::make_pair(SDValue(), SDValue());

// We lower FP->int64 either into FISTP64 followed by a load from a temporary		// We lower FP->int64 into FISTP64 followed by a load from a temporary
// stack slot, or into the FTOL runtime function.		// stack slot.
MachineFunction &MF = DAG.getMachineFunction();		MachineFunction &MF = DAG.getMachineFunction();
unsigned MemSize = DstTy.getSizeInBits()/8;		unsigned MemSize = DstTy.getSizeInBits()/8;
int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);		int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);		SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);

unsigned Opc;		unsigned Opc;
if (!IsSigned && isIntegerTypeFTOL(DstTy))
Opc = X86ISD::WIN_FTOL;
else
switch (DstTy.getSimpleVT().SimpleTy) {		switch (DstTy.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Invalid FP_TO_SINT to lower!");		default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;		case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;		case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;		case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
}		}

SDValue Chain = DAG.getEntryNode();		SDValue Chain = DAG.getEntryNode();
SDValue Value = Op.getOperand(0);		SDValue Value = Op.getOperand(0);
EVT TheVT = Op.getOperand(0).getValueType();		SDValue Adjust; // 0x0 or 0x80000000, for result sign bit adjustment.

		if (UnsignedFixup) {
		//
		// Conversion to unsigned i64 is implemented with a select,
		// depending on whether the source value fits in the range
		// of a signed i64. Let Thresh be the FP equivalent of
		// 0x8000000000000000ULL.
		//
		// Adjust i32 = (Value < Thresh) ? 0 : 0x80000000;
		// FistSrc = (Value < Thresh) ? Value : (Value - Thresh);
		// Fist-to-mem64 FistSrc
		// Add 0 or 0x800...0ULL to the 64-bit result, which is equivalent
		// to XOR'ing the high 32 bits with Adjust.
		//
		// Being a power of 2, Thresh is exactly representable in all FP formats.
		// For X87 we'd like to use the smallest FP type for this constant, but
		// for DAG type consistency we have to match the FP operand type.

		APFloat Thresh(APFloat::IEEEsingle, APInt(32, 0x5f000000));
		APFloat::opStatus Status = APFloat::opOK;
		bool LosesInfo = false;
		if (TheVT == MVT::f64)
		// The rounding mode is irrelevant as the conversion should be exact.
		Status = Thresh.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
		&LosesInfo);
		else if (TheVT == MVT::f80)
		Status = Thresh.convert(APFloat::x87DoubleExtended,
		APFloat::rmNearestTiesToEven, &LosesInfo);

		assert(Status == APFloat::opOK && !LosesInfo &&
		"FP conversion should have been exact");

		SDValue ThreshVal = DAG.getConstantFP(Thresh, DL, TheVT);

		SDValue Cmp = DAG.getSetCC(DL,
		getSetCCResultType(DAG.getDataLayout(),
		*DAG.getContext(), TheVT),
		Value, ThreshVal, ISD::SETLT);
		Adjust = DAG.getSelect(DL, MVT::i32, Cmp,
		DAG.getConstant(0, DL, MVT::i32),
		DAG.getConstant(0x80000000, DL, MVT::i32));
		SDValue Sub = DAG.getNode(ISD::FSUB, DL, TheVT, Value, ThreshVal);
		Cmp = DAG.getSetCC(DL, getSetCCResultType(DAG.getDataLayout(),
		*DAG.getContext(), TheVT),
		Value, ThreshVal, ISD::SETLT);
		Value = DAG.getSelect(DL, TheVT, Cmp, Value, Sub);
		}

// FIXME This causes a redundant load/store if the SSE-class value is already		// FIXME This causes a redundant load/store if the SSE-class value is already
// in memory, such as if it is on the callstack.		// in memory, such as if it is on the callstack.
if (isScalarFPTypeInSSEReg(TheVT)) {		if (isScalarFPTypeInSSEReg(TheVT)) {
assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");		assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
Chain = DAG.getStore(Chain, DL, Value, StackSlot,		Chain = DAG.getStore(Chain, DL, Value, StackSlot,
MachinePointerInfo::getFixedStack(SSFI),		MachinePointerInfo::getFixedStack(SSFI),
false, false, 0);		false, false, 0);
SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);		SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
Show All 9 Lines	if (isScalarFPTypeInSSEReg(TheVT)) {
SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);		SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
StackSlot = DAG.getFrameIndex(SSFI, PtrVT);		StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
}		}

MachineMemOperand *MMO =		MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),		MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
MachineMemOperand::MOStore, MemSize, MemSize);		MachineMemOperand::MOStore, MemSize, MemSize);

if (Opc != X86ISD::WIN_FTOL) {		if (UnsignedFixup) {

		// Insert the FIST, load its result as two i32's,
		// and XOR the high i32 with Adjust.

		SDValue FistOps[] = { Chain, Value, StackSlot };
		SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
		FistOps, DstTy, MMO);

		SDValue Low32 = DAG.getLoad(MVT::i32, DL, FIST, StackSlot,
		MachinePointerInfo(),
		false, false, false, 0);
		SDValue HighAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackSlot,
		DAG.getConstant(4, DL, PtrVT));

		SDValue High32 = DAG.getLoad(MVT::i32, DL, FIST, HighAddr,
		MachinePointerInfo(),
		false, false, false, 0);
		High32 = DAG.getNode(ISD::XOR, DL, MVT::i32, High32, Adjust);

		if (Subtarget->is64Bit()) {
		// Join High32 and Low32 into a 64-bit result.
		// (High32 << 32) \| Low32
		Low32 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Low32);
		High32 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, High32);
		majnemerUnsubmitted Not Done Reply Inline Actions Could we use `ANY_EXTEND` for `High32`? We will shift out the top 32-bits anyway. majnemer: Could we use `ANY_EXTEND` for `High32`? We will shift out the top 32-bits anyway.
		mbodartAuthorUnsubmitted Not Done Reply Inline Actions Yes, thanks for the suggestion! Fixed in latest upload. mbodart: Yes, thanks for the suggestion! Fixed in latest upload.
		High32 = DAG.getNode(ISD::SHL, DL, MVT::i64, High32,
		DAG.getConstant(32, DL, MVT::i8));
		SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i64, High32, Low32);
		return std::make_pair(Result, SDValue());
		}

		SDValue ResultOps[] = { Low32, High32 };

		SDValue pair = IsReplace
		? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, ResultOps)
		: DAG.getMergeValues(ResultOps, DL);
		return std::make_pair(pair, SDValue());
		} else {
// Build the FP_TO_INT*_IN_MEM		// Build the FP_TO_INT*_IN_MEM
SDValue Ops[] = { Chain, Value, StackSlot };		SDValue Ops[] = { Chain, Value, StackSlot };
SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),		SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
Ops, DstTy, MMO);		Ops, DstTy, MMO);
return std::make_pair(FIST, StackSlot);		return std::make_pair(FIST, StackSlot);
} else {
SDValue ftol = DAG.getNode(X86ISD::WIN_FTOL, DL,
DAG.getVTList(MVT::Other, MVT::Glue),
Chain, Value);
SDValue eax = DAG.getCopyFromReg(ftol, DL, X86::EAX,
MVT::i32, ftol.getValue(1));
SDValue edx = DAG.getCopyFromReg(eax.getValue(1), DL, X86::EDX,
MVT::i32, eax.getValue(2));
SDValue Ops[] = { eax, edx };
SDValue pair = IsReplace
? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops)
: DAG.getMergeValues(Ops, DL);
return std::make_pair(pair, SDValue());
}		}
}		}

static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,		static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {		const X86Subtarget *Subtarget) {
MVT VT = Op->getSimpleValueType(0);		MVT VT = Op->getSimpleValueType(0);
SDValue In = Op->getOperand(0);		SDValue In = Op->getOperand(0);
MVT InVT = In.getSimpleValueType();		MVT InVT = In.getSimpleValueType();
▲ Show 20 Lines • Show All 244 Lines • ▼ Show 20 Lines
SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,		SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
SelectionDAG &DAG) const {		SelectionDAG &DAG) const {
assert(!Op.getSimpleValueType().isVector());		assert(!Op.getSimpleValueType().isVector());

std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG,		std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG,
/IsSigned=/ true, /IsReplace=/ false);		/IsSigned=/ true, /IsReplace=/ false);
SDValue FIST = Vals.first, StackSlot = Vals.second;		SDValue FIST = Vals.first, StackSlot = Vals.second;
// If FP_TO_INTHelper failed, the node is actually supposed to be Legal.		// If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
if (!FIST.getNode()) return Op;		if (!FIST.getNode())
		return Op;

if (StackSlot.getNode())		if (StackSlot.getNode())
// Load the result.		// Load the result.
return DAG.getLoad(Op.getValueType(), SDLoc(Op),		return DAG.getLoad(Op.getValueType(), SDLoc(Op),
FIST, StackSlot, MachinePointerInfo(),		FIST, StackSlot, MachinePointerInfo(),
false, false, false, 0);		false, false, false, 0);

// The node is the result.		// The node is the result.
return FIST;		return FIST;
}		}

SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,		SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
SelectionDAG &DAG) const {		SelectionDAG &DAG) const {
std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG,		std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG,
/IsSigned=/ false, /IsReplace=/ false);		/IsSigned=/ false, /IsReplace=/ false);
SDValue FIST = Vals.first, StackSlot = Vals.second;		SDValue FIST = Vals.first, StackSlot = Vals.second;
assert(FIST.getNode() && "Unexpected failure");		// If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
		if (!FIST.getNode())
		majnemerUnsubmitted Not Done Reply Inline Actions Please format the return onto its own line. majnemer: Please format the return onto its own line.
		mbodartAuthorUnsubmitted Not Done Reply Inline Actions Fixed, both here and up above in LowerFP_TO_SINT, where this was copied from. mbodart: Fixed, both here and up above in LowerFP_TO_SINT, where this was copied from.
		return Op;

if (StackSlot.getNode())		if (StackSlot.getNode())
// Load the result.		// Load the result.
return DAG.getLoad(Op.getValueType(), SDLoc(Op),		return DAG.getLoad(Op.getValueType(), SDLoc(Op),
FIST, StackSlot, MachinePointerInfo(),		FIST, StackSlot, MachinePointerInfo(),
false, false, false, 0);		false, false, false, 0);

// The node is the result.		// The node is the result.
▲ Show 20 Lines • Show All 6,008 Lines • ▼ Show 20 Lines	void X86TargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::UREM:		case ISD::UREM:
case ISD::SDIVREM:		case ISD::SDIVREM:
case ISD::UDIVREM: {		case ISD::UDIVREM: {
SDValue V = LowerWin64_i128OP(SDValue(N,0), DAG);		SDValue V = LowerWin64_i128OP(SDValue(N,0), DAG);
Results.push_back(V);		Results.push_back(V);
return;		return;
}		}
case ISD::FP_TO_SINT:		case ISD::FP_TO_SINT:
// FP_TO_INT*_IN_MEM is not legal for f16 inputs. Do not convert
// (FP_TO_SINT (load f16)) to FP_TO_INT*.
if (N->getOperand(0).getValueType() == MVT::f16)
break;
// fallthrough
case ISD::FP_TO_UINT: {		case ISD::FP_TO_UINT: {
bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;		bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;

if (!IsSigned && !isIntegerTypeFTOL(SDValue(N, 0).getValueType()))
return;

std::pair<SDValue,SDValue> Vals =		std::pair<SDValue,SDValue> Vals =
FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, /IsReplace=/ true);		FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, /IsReplace=/ true);
SDValue FIST = Vals.first, StackSlot = Vals.second;		SDValue FIST = Vals.first, StackSlot = Vals.second;
if (FIST.getNode()) {		if (FIST.getNode()) {
EVT VT = N->getValueType(0);		EVT VT = N->getValueType(0);
// Return a load from the stack slot.		// Return a load from the stack slot.
if (StackSlot.getNode())		if (StackSlot.getNode())
Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot,		Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot,
▲ Show 20 Lines • Show All 7,547 Lines • ▼ Show 20 Lines	int X86TargetLowering::getScalingFactorCost(const DataLayout &DL,
// E.g., on Haswell:		// E.g., on Haswell:
// vmovaps %ymm1, (%r8, %rdi) can use port 2 or 3.		// vmovaps %ymm1, (%r8, %rdi) can use port 2 or 3.
// vmovaps %ymm1, (%r8) can use port 2, 3, or 7.		// vmovaps %ymm1, (%r8) can use port 2, 3, or 7.
if (isLegalAddressingMode(DL, AM, Ty, AS))		if (isLegalAddressingMode(DL, AM, Ty, AS))
// Scale represents reg2 * scale, thus account for 1		// Scale represents reg2 * scale, thus account for 1
// as soon as we use a second register.		// as soon as we use a second register.
return AM.Scale != 0;		return AM.Scale != 0;
return -1;		return -1;
}		}
		mkuperUnsubmitted Not Done Reply Inline Actions I'm ok with just deleting the FTOL code immediately, unless anyone objects. (Can be a follow-up patch) mkuper: I'm ok with just deleting the FTOL code immediately, unless anyone objects. (Can be a follow-up…
		majnemerUnsubmitted Not Done Reply Inline Actions I think it makes sense to remove it. majnemer: I think it makes sense to remove it.
		mbodartAuthorUnsubmitted Not Done Reply Inline Actions I'm fine with removing it, but need a little guidance as to the depth of removal. Certainly isIntegerTypeFTOL and isTargetFTOL, and their use in FP_TO_INTHelper, can be removed. But further deletion of the WIN_FTOL instructions in the .td files would prevent any future use of that library routine. How can I be certain that isn't being used, or won't be needed? mbodart: I'm fine with removing it, but need a little guidance as to the depth of removal. Certainly…

bool X86TargetLowering::isTargetFTOL() const {
return Subtarget->isTargetKnownWindowsMSVC() && !Subtarget->is64Bit();
}

test/CodeGen/X86/pr17631.ll

Show All 24 Lines	define <8 x float> @foo(<8 x float> %y, i64* %p, double %x) {
%i = fptoui double %x to i64		%i = fptoui double %x to i64
store i64 %i, i64* %p		store i64 %i, i64* %p
%ret = fadd <8 x float> %y, %y		%ret = fadd <8 x float> %y, %y
ret <8 x float> %ret		ret <8 x float> %ret
}		}

; CHECK: foo		; CHECK: foo
; CHECK-NOT: vzeroupper		; CHECK-NOT: vzeroupper
; CHECK: _ftol2		; CHECK: {{cvtt\|fist}}
; CHECK: ret		; CHECK: ret

test/CodeGen/X86/scalar-fp-to-i64.ll

				; Check that scalar FP conversions to signed and unsigned int64 are using
				; reasonable sequences, across platforms and target switches.
				;
				; The signed case is straight forward, and the tests here basically
				; ensure successful compilation (f80 with avx512 was broken at one point).
				;
				; For the unsigned case there are many possible sequences, so to avoid
				; a fragile test we just check for the presence of a few key instructions.
				; AVX512 on Intel64 can use vcvtts[ds]2usi directly for float and double.
				; Otherwise the sequence will involve an FP subtract (fsub, subss or subsd),
				; and a truncating conversion (cvtts[ds]2si, fisttp, or fnstcw+fist). When
				; both a subtract and fnstcw are needed, they can occur in either order.
				;
				; The interesting subtargets are AVX512F (vcvtts[ds]2usi), SSE3 (fisttp),
				; SSE2 (cvtts[ds]2si) and vanilla X87 (fnstcw+fist, 32-bit only).
				;
				; RUN: llc < %s -mtriple=i386-pc-windows-msvc -mattr=+avx512f \| FileCheck %s --check-prefix=CHECK --check-prefix=AVX512_32
				; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+avx512f \| FileCheck %s --check-prefix=CHECK --check-prefix=AVX512_32
				mkuperUnsubmitted Not Done Reply Inline Actions Can you use -mattr here instead of an explicit -mcpu? mkuper: Can you use -mattr here instead of an explicit -mcpu?
				mbodartAuthorUnsubmitted Not Done Reply Inline Actions I see both -mcpu and -mattr being used for skx and avx512 testing in lit tests. -mcpu seemed simpler and more "bullet proof" if you will in that it captures all relevant attributes (sse, sse2, ... avx, skx). But if -mattr is the preferred method, I can certainly change to that. What is the BKM? mbodart: I see both -mcpu and -mattr being used for skx and avx512 testing in lit tests. -mcpu seemed…
				; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+avx512f \| FileCheck %s --check-prefix=CHECK --check-prefix=AVX512_64
				; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f \| FileCheck %s --check-prefix=CHECK --check-prefix=AVX512_64
				; RUN: llc < %s -mtriple=i386-pc-windows-msvc -mattr=+sse3 \| FileCheck %s --check-prefix=CHECK --check-prefix=SSE3_32
				; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+sse3 \| FileCheck %s --check-prefix=CHECK --check-prefix=SSE3_32
				; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+sse3 \| FileCheck %s --check-prefix=CHECK --check-prefix=SSE3_64
				; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse3 \| FileCheck %s --check-prefix=CHECK --check-prefix=SSE3_64
				; RUN: llc < %s -mtriple=i386-pc-windows-msvc -mattr=+sse2 \| FileCheck %s --check-prefix=CHECK --check-prefix=SSE2_32
				; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+sse2 \| FileCheck %s --check-prefix=CHECK --check-prefix=SSE2_32
				; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc -mattr=+sse2 \| FileCheck %s --check-prefix=CHECK --check-prefix=SSE2_64
				; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 \| FileCheck %s --check-prefix=CHECK --check-prefix=SSE2_64
				; RUN: llc < %s -mtriple=i386-pc-windows-msvc -mattr=-sse \| FileCheck %s --check-prefix=CHECK --check-prefix=X87
				; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=-sse \| FileCheck %s --check-prefix=CHECK --check-prefix=X87

				; CHECK-LABEL: f_to_u64
				; X87-DAG: fsub
				; X87-DAG: fnstcw
				; X87: fist
				; SSE2_32-DAG: {{subss\|fsub}}
				; SSE2_32-DAG: fnstcw
				; SSE2_32: fist
				; SSE2_64: subss
				; SSE2_64: cvttss2si
				; SSE3_32: {{subss\|fsub}}
				; SSE3_32: fistt
				; SSE3_64: subss
				; SSE3_64: cvttss2si
				; AVX512_32: {{subss\|fsub}}
				; AVX512_32: fistt
				; AVX512_64: vcvttss2usi
				; CHECK: ret
				define i64 @f_to_u64(float %a) nounwind {
				%r = fptoui float %a to i64
				ret i64 %r
				}

				; CHECK-LABEL: f_to_s64
				; X87: fnstcw
				; X87: fist
				; SSE2_32: fnstcw
				; SSE2_32: fist
				; SSE2_64: cvttss2si
				; SSE3_32: fistt
				; SSE3_64: cvttss2si
				; AVX512_32: fistt
				; AVX512_64: vcvttss2si
				; CHECK: ret
				define i64 @f_to_s64(float %a) nounwind {
				%r = fptosi float %a to i64
				ret i64 %r
				}

				; CHECK-LABEL: d_to_u64
				; X87-DAG: fsub
				; X87-DAG: fnstcw
				; X87: fist
				; SSE2_32-DAG: {{subsd\|fsub}}
				; SSE2_32-DAG: fnstcw
				; SSE2_32: fist
				; SSE2_64: subsd
				; SSE2_64: cvttsd2si
				; SSE3_32: {{subsd\|fsub}}
				; SSE3_32: fistt
				; SSE3_64: subsd
				; SSE3_64: cvttsd2si
				; AVX512_32: {{subsd\|fsub}}
				; AVX512_32: fistt
				; AVX512_64: vcvttsd2usi
				; CHECK: ret
				define i64 @d_to_u64(double %a) nounwind {
				%r = fptoui double %a to i64
				ret i64 %r
				}

				; CHECK-LABEL: d_to_s64
				; X87: fnstcw
				; X87: fist
				; SSE2_32: fnstcw
				; SSE2_32: fist
				; SSE2_64: cvttsd2si
				; SSE3_32: fistt
				; SSE3_64: cvttsd2si
				; AVX512_32: fistt
				; AVX512_64: vcvttsd2si
				; CHECK: ret
				define i64 @d_to_s64(double %a) nounwind {
				%r = fptosi double %a to i64
				ret i64 %r
				}

				; CHECK-LABEL: x_to_u64
				; CHECK-DAG: fsub
				; X87-DAG: fnstcw
				; SSE2_32-DAG: fnstcw
				; SSE2_64-DAG: fnstcw
				; CHECK: fist
				; CHECK: ret
				define i64 @x_to_u64(x86_fp80 %a) nounwind {
				%r = fptoui x86_fp80 %a to i64
				ret i64 %r
				}

				; CHECK-LABEL: x_to_s64
				; X87: fnstcw
				; X87: fist
				; SSE2_32: fnstcw
				; SSE2_32: fist
				; SSE2_64: fnstcw
				; SSE2_64: fist
				; SSE3_32: fistt
				; SSE3_64: fistt
				; AVX512_32: fistt
				; AVX512_64: fistt
				; CHECK: ret
				define i64 @x_to_s64(x86_fp80 %a) nounwind {
				%r = fptosi x86_fp80 %a to i64
				ret i64 %r
				}

test/CodeGen/X86/win_ftol2.ll

	; RUN: llc < %s -mtriple=i686-pc-win32 -mcpu=generic \| FileCheck %s -check-prefix=FTOL			; RUN: llc < %s -mtriple=i686-pc-win32 -mcpu=generic \| FileCheck %s -check-prefix=COMPILERRT
	; RUN: llc < %s -mtriple=i686-pc-mingw32 \| FileCheck %s -check-prefix=COMPILERRT			; RUN: llc < %s -mtriple=i686-pc-mingw32 \| FileCheck %s -check-prefix=COMPILERRT
	; RUN: llc < %s -mtriple=i686-pc-linux \| FileCheck %s -check-prefix=COMPILERRT			; RUN: llc < %s -mtriple=i686-pc-linux \| FileCheck %s -check-prefix=COMPILERRT
	; RUN: llc < %s -mtriple=x86_64-pc-win32 \| FileCheck %s -check-prefix=COMPILERRT			; RUN: llc < %s -mtriple=x86_64-pc-win32 \| FileCheck %s -check-prefix=COMPILERRT
				mkuperUnsubmitted Not Done Reply Inline Actions What happened to this RUN line? mkuper: What happened to this RUN line?
				mbodartAuthorUnsubmitted Not Done Reply Inline Actions Oops, forgot to reset it before submitting the review. I was playing around with different ways to disable this test. It's left as RUN in my current version. mbodart: Oops, forgot to reset it before submitting the review. I was playing around with different ways…
	; RUN: llc < %s -mtriple=x86_64-pc-mingw32 \| FileCheck %s -check-prefix=COMPILERRT			; RUN: llc < %s -mtriple=x86_64-pc-mingw32 \| FileCheck %s -check-prefix=COMPILERRT
	; RUN: llc < %s -mtriple=x86_64-pc-linux \| FileCheck %s -check-prefix=COMPILERRT			; RUN: llc < %s -mtriple=x86_64-pc-linux \| FileCheck %s -check-prefix=COMPILERRT
	; RUN: llc < %s -mattr=-sse -O0 -mtriple=i686-pc-win32 \| FileCheck %s -check-prefix=FTOL_2			; RUN: llc < %s -mattr=-sse -O0 -mtriple=i686-pc-win32 \| FileCheck %s -check-prefix=COMPILERRT

	; Win32 targets use the MSVCRT _ftol2 runtime function for fptoui to i64. This			; This test originally used the FTOL and FTOL_2 checks for the i686-pc-win32
	; function has a nonstandard calling convention: the input value is expected on			; triples, under the assumption that Win32 targets should use the MSVCRT
	; the x87 stack instead of the callstack. The input value is popped by the			; _ftol2 runtime function for fptoui to i64. That usage was incorrect,
	; callee. Mingw32 uses normal cdecl compiler-rt functions.			; as _ftol2 performs conversion to signed i64. As of the compiler fix,
				; the FTOL/FTOL_2 checks are no longer used, which basically renders
				; this test meaningless.

	define i64 @double_ui64(double %x) nounwind {			define i64 @double_ui64(double %x) nounwind {
	entry:			entry:
	; COMPILERRT: @double_ui64			; COMPILERRT: @double_ui64
	; COMPILERRT-NOT: calll __ftol2			; COMPILERRT-NOT: calll __ftol2
	; FTOL: @double_ui64			; FTOL: @double_ui64
	; FTOL: fldl			; FTOL: fldl
	; FTOL: calll __ftol2			; FTOL: calll __ftol2
	▲ Show 20 Lines • Show All 146 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[X86] -- Fix fptoui i64 conversions for IA32 (performance and correctness)
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 32324

lib/Target/X86/X86ISelLowering.h

lib/Target/X86/X86ISelLowering.cpp

test/CodeGen/X86/pr17631.ll

test/CodeGen/X86/scalar-fp-to-i64.ll

test/CodeGen/X86/win_ftol2.ll

This is an archive of the discontinued LLVM Phabricator instance.

[X86] -- Fix fptoui i64 conversions for IA32 (performance and correctness)ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 32324

lib/Target/X86/X86ISelLowering.h

lib/Target/X86/X86ISelLowering.cpp

test/CodeGen/X86/pr17631.ll

test/CodeGen/X86/scalar-fp-to-i64.ll

test/CodeGen/X86/win_ftol2.ll

[X86] -- Fix fptoui i64 conversions for IA32 (performance and correctness)
ClosedPublic