This is an archive of the discontinued LLVM Phabricator instance.

Masked load/store for types that require legalization.
Needs ReviewPublic

Authored by delena on Jan 14 2015, 6:14 AM.

Download Raw Diff

Details

Reviewers

nadav
mzolotukhin
aschwaighofer
steven_wu

Summary

In this big patch I'm solving two things. (If you'll say that it is too big and not reviewable, I'll spit into two, but this things are connected)

The current lowering of masked load/store for <2 x i32> and <2 x f32> is incorrect, and I'm solving this in type legalizer and subsequent "combine" in X86.
I added the cost estimation for masked operations that shows that (1) masked load/store for these vector types are very expensive ( due to expanding loads and truncating stores ) (2) maskmov operation itself is not as cheap as vector load-store.

Diff Detail

Event Timeline

delena updated this revision to Diff 18149.Jan 14 2015, 6:14 AM

delena retitled this revision from to Masked load/store for types that require legalization..

delena updated this object.

delena edited the test plan for this revision. (Show Details)

delena added reviewers: nadav, aschwaighofer, mzolotukhin, steven_wu.

delena set the repository for this revision to rL LLVM.

delena added a subscriber: Unknown Object (MLST).

Revision Contents

Path

Size

include/

llvm/

Analysis/

TargetTransformInfo.h

5 lines

CodeGen/

SelectionDAG.h

6 lines

SelectionDAGNodes.h

31 lines

lib/

Analysis/

TargetTransformInfo.cpp

12 lines

CodeGen/

BasicTargetTransformInfo.cpp

4 lines

SelectionDAG/

DAGCombiner.cpp

14 lines

LegalizeIntegerTypes.cpp

26 lines

LegalizeTypes.h

1 line

LegalizeVectorTypes.cpp

62 lines

SelectionDAG.cpp

13 lines

SelectionDAGBuilder.cpp

6 lines

Target/

X86/

X86ISelLowering.cpp

164 lines

X86TargetTransformInfo.cpp

46 lines

Transforms/

Vectorize/

LoopVectorize.cpp

6 lines

test/

Analysis/

CostModel/

X86/

masked-intrinsic-cost.ll

89 lines

CodeGen/

X86/

masked_memop.ll

7 lines

Diff 18149

include/llvm/Analysis/TargetTransformInfo.h

Show First 20 Lines • Show All 391 Lines • ▼ Show 20 Lines	public:
virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,		virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index = -1) const;		unsigned Index = -1) const;

/// \return The cost of Load and Store instructions.		/// \return The cost of Load and Store instructions.
virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,		virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment,		unsigned Alignment,
unsigned AddressSpace) const;		unsigned AddressSpace) const;

		/// \return The cost of masked Load and Store instructions.
		virtual unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
		unsigned Alignment,
		unsigned AddressSpace) const;

/// \brief Calculate the cost of performing a vector reduction.		/// \brief Calculate the cost of performing a vector reduction.
///		///
/// This is the cost of reducing the vector value of type \p Ty to a scalar		/// This is the cost of reducing the vector value of type \p Ty to a scalar
/// value using the operation denoted by \p Opcode. The form of the reduction		/// value using the operation denoted by \p Opcode. The form of the reduction
/// can either be a pairwise reduction or a reduction that splits the vector		/// can either be a pairwise reduction or a reduction that splits the vector
/// at every reduction level.		/// at every reduction level.
///		///
/// Pairwise:		/// Pairwise:
▲ Show 20 Lines • Show All 49 Lines • Show Last 20 Lines

include/llvm/CodeGen/SelectionDAG.h

Show First 20 Lines • Show All 861 Lines • ▼ Show 20 Lines	SDValue getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr,
unsigned Alignment,		unsigned Alignment,
const AAMDNodes &AAInfo = AAMDNodes());		const AAMDNodes &AAInfo = AAMDNodes());
SDValue getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr,		SDValue getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr,
EVT TVT, MachineMemOperand *MMO);		EVT TVT, MachineMemOperand *MMO);
SDValue getIndexedStore(SDValue OrigStoe, SDLoc dl, SDValue Base,		SDValue getIndexedStore(SDValue OrigStoe, SDLoc dl, SDValue Base,
SDValue Offset, ISD::MemIndexedMode AM);		SDValue Offset, ISD::MemIndexedMode AM);

SDValue getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr,		SDValue getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr,
SDValue Mask, SDValue Src0, MachineMemOperand *MMO);		SDValue Mask, SDValue Src0, EVT MemVT,
		MachineMemOperand *MMO, ISD::LoadExtType);
SDValue getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,		SDValue getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
SDValue Ptr, SDValue Mask, MachineMemOperand *MMO);		SDValue Ptr, SDValue Mask, EVT MemVT,
		MachineMemOperand *MMO, bool IsTrunc);
/// getSrcValue - Construct a node to track a Value* through the backend.		/// getSrcValue - Construct a node to track a Value* through the backend.
SDValue getSrcValue(const Value *v);		SDValue getSrcValue(const Value *v);

/// getMDNode - Return an MDNodeSDNode which holds an MDNode.		/// getMDNode - Return an MDNodeSDNode which holds an MDNode.
SDValue getMDNode(const MDNode *MD);		SDValue getMDNode(const MDNode *MD);

/// getAddrSpaceCast - Return an AddrSpaceCastSDNode.		/// getAddrSpaceCast - Return an AddrSpaceCastSDNode.
SDValue getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr,		SDValue getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr,
▲ Show 20 Lines • Show All 404 Lines • Show Last 20 Lines

include/llvm/CodeGen/SelectionDAGNodes.h

Show First 20 Lines • Show All 1,964 Lines • ▼ Show 20 Lines	public:
}		}
};		};

/// MaskedLoadSDNode - This class is used to represent an MLOAD node		/// MaskedLoadSDNode - This class is used to represent an MLOAD node
///		///
class MaskedLoadSDNode : public MaskedLoadStoreSDNode {		class MaskedLoadSDNode : public MaskedLoadStoreSDNode {
public:		public:
friend class SelectionDAG;		friend class SelectionDAG;
MaskedLoadSDNode(unsigned Order, DebugLoc dl,		MaskedLoadSDNode(unsigned Order, DebugLoc dl, SDValue *Operands,
SDValue *Operands, unsigned numOperands,		unsigned numOperands, SDVTList VTs, ISD::LoadExtType ETy,
SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)		EVT MemVT, MachineMemOperand *MMO)
: MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, Operands, numOperands,		: MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, Operands, numOperands,
VTs, MemVT, MMO)		VTs, MemVT, MMO) {
{}		SubclassData \|= (unsigned short)ETy;
		}

		ISD::LoadExtType getExtensionType() const {
		return ISD::LoadExtType(SubclassData & 3);
		}
const SDValue &getSrc0() const { return getOperand(3); }		const SDValue &getSrc0() const { return getOperand(3); }
static bool classof(const SDNode *N) {		static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::MLOAD;		return N->getOpcode() == ISD::MLOAD;
}		}
};		};

/// MaskedStoreSDNode - This class is used to represent an MSTORE node		/// MaskedStoreSDNode - This class is used to represent an MSTORE node
///		///
class MaskedStoreSDNode : public MaskedLoadStoreSDNode {		class MaskedStoreSDNode : public MaskedLoadStoreSDNode {

public:		public:
friend class SelectionDAG;		friend class SelectionDAG;
MaskedStoreSDNode(unsigned Order, DebugLoc dl,		MaskedStoreSDNode(unsigned Order, DebugLoc dl, SDValue *Operands,
SDValue *Operands, unsigned numOperands,		unsigned numOperands, SDVTList VTs, bool isTrunc, EVT MemVT,
SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)		MachineMemOperand *MMO)
: MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, Operands, numOperands,		: MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, Operands, numOperands,
VTs, MemVT, MMO)		VTs, MemVT, MMO) {
{}		SubclassData \|= (unsigned short)isTrunc;
		}
		/// isTruncatingStore - Return true if the op does a truncation before store.
		/// For integers this is the same as doing a TRUNCATE and storing the result.
		/// For floats, it is the same as doing an FP_ROUND and storing the result.
		bool isTruncatingStore() const { return SubclassData & 1; }

const SDValue &getData() const { return getOperand(3); }		const SDValue &getValue() const { return getOperand(3); }

static bool classof(const SDNode *N) {		static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::MSTORE;		return N->getOpcode() == ISD::MSTORE;
}		}
};		};

/// MachineSDNode - An SDNode that represents everything that will be needed		/// MachineSDNode - An SDNode that represents everything that will be needed
/// to construct a MachineInstr. These nodes are created during the		/// to construct a MachineInstr. These nodes are created during the
▲ Show 20 Lines • Show All 179 Lines • Show Last 20 Lines

lib/Analysis/TargetTransformInfo.cpp

Show First 20 Lines • Show All 215 Lines • ▼ Show 20 Lines
}		}

unsigned TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,		unsigned TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment,		unsigned Alignment,
unsigned AddressSpace) const {		unsigned AddressSpace) const {
return PrevTTI->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);		return PrevTTI->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
}		}

unsigned		unsigned
		TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
		unsigned Alignment,
		unsigned AddressSpace) const {
		return PrevTTI->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
		}

		unsigned
TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID,		TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID,
Type *RetTy,		Type *RetTy,
ArrayRef<Type *> Tys) const {		ArrayRef<Type *> Tys) const {
return PrevTTI->getIntrinsicInstrCost(ID, RetTy, Tys);		return PrevTTI->getIntrinsicInstrCost(ID, RetTy, Tys);
}		}

unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const {		unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const {
return PrevTTI->getNumberOfParts(Tp);		return PrevTTI->getNumberOfParts(Tp);
▲ Show 20 Lines • Show All 384 Lines • ▼ Show 20 Lines	unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
return 1;		return 1;
}		}

unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,		unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace) const override {		unsigned AddressSpace) const override {
return 1;		return 1;
}		}

		unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
		unsigned AddressSpace) const override {
		return 1;
		}

unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,		unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
ArrayRef<Type*> Tys) const override {		ArrayRef<Type*> Tys) const override {
return 1;		return 1;
}		}

unsigned getNumberOfParts(Type *Tp) const override {		unsigned getNumberOfParts(Type *Tp) const override {
return 0;		return 0;
}		}
Show All 24 Lines

lib/CodeGen/BasicTargetTransformInfo.cpp

Show First 20 Lines • Show All 576 Lines • ▼ Show 20 Lines	unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
case Intrinsic::round: ISD = ISD::FROUND; break;		case Intrinsic::round: ISD = ISD::FROUND; break;
case Intrinsic::pow: ISD = ISD::FPOW; break;		case Intrinsic::pow: ISD = ISD::FPOW; break;
case Intrinsic::fma: ISD = ISD::FMA; break;		case Intrinsic::fma: ISD = ISD::FMA; break;
case Intrinsic::fmuladd: ISD = ISD::FMA; break;		case Intrinsic::fmuladd: ISD = ISD::FMA; break;
// FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.		// FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
case Intrinsic::lifetime_start:		case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:		case Intrinsic::lifetime_end:
return 0;		return 0;
		case Intrinsic::masked_store:
		return TopTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0, 0);
		case Intrinsic::masked_load:
		return TopTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
}		}

const TargetLoweringBase *TLI = getTLI();		const TargetLoweringBase *TLI = getTLI();
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(RetTy);		std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(RetTy);

if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {		if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
// The operation is legal. Assume it costs 1.		// The operation is legal. Assume it costs 1.
// If the type is split to multiple registers, assume that there is some		// If the type is split to multiple registers, assume that there is some
▲ Show 20 Lines • Show All 55 Lines • Show Last 20 Lines

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 4,836 Lines • ▼ Show 20 Lines

SDValue DAGCombiner::visitMSTORE(SDNode *N) {		SDValue DAGCombiner::visitMSTORE(SDNode *N) {

if (Level >= AfterLegalizeTypes)		if (Level >= AfterLegalizeTypes)
return SDValue();		return SDValue();

MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);		MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
SDValue Mask = MST->getMask();		SDValue Mask = MST->getMask();
SDValue Data = MST->getData();		SDValue Data = MST->getValue();
SDLoc DL(N);		SDLoc DL(N);

// If the MSTORE data type requires splitting and the mask is provided by a		// If the MSTORE data type requires splitting and the mask is provided by a
// SETCC, then split both nodes and its operands before legalization. This		// SETCC, then split both nodes and its operands before legalization. This
// prevents the type legalizer from unrolling SETCC into scalar comparisons		// prevents the type legalizer from unrolling SETCC into scalar comparisons
// and enables future optimizations (e.g. min/max pattern matching on X86).		// and enables future optimizations (e.g. min/max pattern matching on X86).
if (Mask.getOpcode() == ISD::SETCC) {		if (Mask.getOpcode() == ISD::SETCC) {

Show All 26 Lines	if (Mask.getOpcode() == ISD::SETCC) {
SDValue DataLo, DataHi;		SDValue DataLo, DataHi;
std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);		std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);

MachineMemOperand *MMO = DAG.getMachineFunction().		MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(MST->getPointerInfo(),		getMachineMemOperand(MST->getPointerInfo(),
MachineMemOperand::MOStore, LoMemVT.getStoreSize(),		MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
Alignment, MST->getAAInfo(), MST->getRanges());		Alignment, MST->getAAInfo(), MST->getRanges());

Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, MMO);		Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
		MST->isTruncatingStore());

unsigned IncrementSize = LoMemVT.getSizeInBits()/8;		unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,		Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, Ptr.getValueType()));		DAG.getConstant(IncrementSize, Ptr.getValueType()));

MMO = DAG.getMachineFunction().		MMO = DAG.getMachineFunction().
getMachineMemOperand(MST->getPointerInfo(),		getMachineMemOperand(MST->getPointerInfo(),
MachineMemOperand::MOStore, HiMemVT.getStoreSize(),		MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
SecondHalfAlignment, MST->getAAInfo(),		SecondHalfAlignment, MST->getAAInfo(),
MST->getRanges());		MST->getRanges());

Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, MMO);		Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
		MST->isTruncatingStore());

AddToWorklist(Lo.getNode());		AddToWorklist(Lo.getNode());
AddToWorklist(Hi.getNode());		AddToWorklist(Hi.getNode());

return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);		return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
}		}
return SDValue();		return SDValue();
}		}
▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines	if (Mask.getOpcode() == ISD::SETCC) {
EVT LoMemVT, HiMemVT;		EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);		std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);

MachineMemOperand *MMO = DAG.getMachineFunction().		MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(MLD->getPointerInfo(),		getMachineMemOperand(MLD->getPointerInfo(),
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),		MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MLD->getAAInfo(), MLD->getRanges());		Alignment, MLD->getAAInfo(), MLD->getRanges());

Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, MMO);		Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
		ISD::NON_EXTLOAD);

unsigned IncrementSize = LoMemVT.getSizeInBits()/8;		unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,		Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, Ptr.getValueType()));		DAG.getConstant(IncrementSize, Ptr.getValueType()));

MMO = DAG.getMachineFunction().		MMO = DAG.getMachineFunction().
getMachineMemOperand(MLD->getPointerInfo(),		getMachineMemOperand(MLD->getPointerInfo(),
MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),		MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());		SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());

Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, MMO);		Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
		ISD::NON_EXTLOAD);

AddToWorklist(Lo.getNode());		AddToWorklist(Lo.getNode());
AddToWorklist(Hi.getNode());		AddToWorklist(Hi.getNode());

// Build a factor node to remember that this load is independent of the		// Build a factor node to remember that this load is independent of the
// other one.		// other one.
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),		Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
Hi.getValue(1));		Hi.getValue(1));
▲ Show 20 Lines • Show All 7,806 Lines • Show Last 20 Lines

lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Show First 20 Lines • Show All 450 Lines • ▼ Show 20 Lines	SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {

// Legalized the chain result - switch anything that used the old chain to		// Legalized the chain result - switch anything that used the old chain to
// use the new one.		// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));		ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;		return Res;
}		}

SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {		SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {

EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));		EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue ExtSrc0 = GetPromotedInteger(N->getSrc0());		SDValue ExtSrc0 = GetPromotedInteger(N->getSrc0());
SDValue ExtMask = PromoteTargetBoolean(N->getMask(), NVT);
SDLoc dl(N);

MachineMemOperand *MMO = DAG.getMachineFunction().		SDValue Mask = N->getMask();
getMachineMemOperand(N->getPointerInfo(),		EVT NewMaskVT = getSetCCResultType(NVT);
MachineMemOperand::MOLoad, NVT.getStoreSize(),		if (NewMaskVT != N->getMask().getValueType())
N->getAlignment(), N->getAAInfo(), N->getRanges());		Mask = PromoteTargetBoolean(Mask, NewMaskVT);
		SDLoc dl(N);

SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),		SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
ExtMask, ExtSrc0, MMO);		Mask, ExtSrc0, N->getMemoryVT(),
		N->getMemOperand(), ISD::SEXTLOAD);
// Legalized the chain result - switch anything that used the old chain to		// Legalized the chain result - switch anything that used the old chain to
// use the new one.		// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));		ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;		return Res;
}		}
/// Promote the overflow flag of an overflowing arithmetic node.		/// Promote the overflow flag of an overflowing arithmetic node.
SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {		SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
// Simply change the return type of the boolean result.		// Simply change the return type of the boolean result.
▲ Show 20 Lines • Show All 633 Lines • ▼ Show 20 Lines	SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
// Truncate the value and store the result.		// Truncate the value and store the result.
return DAG.getTruncStore(Ch, dl, Val, Ptr,		return DAG.getTruncStore(Ch, dl, Val, Ptr,
N->getMemoryVT(), N->getMemOperand());		N->getMemoryVT(), N->getMemOperand());
}		}

SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){		SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){

assert(OpNo == 2 && "Only know how to promote the mask!");		assert(OpNo == 2 && "Only know how to promote the mask!");
SDValue DataOp = N->getData();		SDValue DataOp = N->getValue();
EVT DataVT = DataOp.getValueType();		EVT DataVT = DataOp.getValueType();
SDValue Mask = N->getMask();		SDValue Mask = N->getMask();
EVT MaskVT = Mask.getValueType();		EVT MaskVT = Mask.getValueType();
SDLoc dl(N);		SDLoc dl(N);

		bool TruncateStore = false;
if (!TLI.isTypeLegal(DataVT)) {		if (!TLI.isTypeLegal(DataVT)) {
if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) {		if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) {
DataOp = GetPromotedInteger(DataOp);		DataOp = GetPromotedInteger(DataOp);
Mask = PromoteTargetBoolean(Mask, DataOp.getValueType());		Mask = PromoteTargetBoolean(Mask, DataOp.getValueType());
		TruncateStore = true;
}		}
else {		else {
assert(getTypeAction(DataVT) == TargetLowering::TypeWidenVector &&		assert(getTypeAction(DataVT) == TargetLowering::TypeWidenVector &&
"Unexpected data legalization in MSTORE");		"Unexpected data legalization in MSTORE");
DataOp = GetWidenedVector(DataOp);		DataOp = GetWidenedVector(DataOp);

if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)		if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
Mask = GetWidenedVector(Mask);		Mask = GetWidenedVector(Mask);
Show All 13 Lines	else {
Ops[i] = ZeroVal;		Ops[i] = ZeroVal;

Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);		Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
}		}
}		}
}		}
else		else
Mask = PromoteTargetBoolean(N->getMask(), DataOp.getValueType());		Mask = PromoteTargetBoolean(N->getMask(), DataOp.getValueType());
SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());		return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask,
NewOps[2] = Mask;		N->getMemoryVT(), N->getMemOperand(),
NewOps[3] = DataOp;		TruncateStore);
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
}		}

SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo){		SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo){
assert(OpNo == 2 && "Only know how to promote the mask!");		assert(OpNo == 2 && "Only know how to promote the mask!");
EVT DataVT = N->getValueType(0);		EVT DataVT = N->getValueType(0);
SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);		SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());		SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
NewOps[OpNo] = Mask;		NewOps[OpNo] = Mask;
▲ Show 20 Lines • Show All 1,992 Lines • Show Last 20 Lines

lib/CodeGen/SelectionDAG/LegalizeTypes.h

Show First 20 Lines • Show All 653 Lines • ▼ Show 20 Lines	private:
// Widen Vector Operand.		// Widen Vector Operand.
bool WidenVectorOperand(SDNode *N, unsigned OpNo);		bool WidenVectorOperand(SDNode *N, unsigned OpNo);
SDValue WidenVecOp_BITCAST(SDNode *N);		SDValue WidenVecOp_BITCAST(SDNode *N);
SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);		SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
SDValue WidenVecOp_EXTEND(SDNode *N);		SDValue WidenVecOp_EXTEND(SDNode *N);
SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);		SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);		SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue WidenVecOp_STORE(SDNode* N);		SDValue WidenVecOp_STORE(SDNode* N);
		SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_SETCC(SDNode* N);		SDValue WidenVecOp_SETCC(SDNode* N);

SDValue WidenVecOp_Convert(SDNode *N);		SDValue WidenVecOp_Convert(SDNode *N);

//===--------------------------------------------------------------------===//		//===--------------------------------------------------------------------===//
// Vector Widening Utilities Support: LegalizeVectorTypes.cpp		// Vector Widening Utilities Support: LegalizeVectorTypes.cpp
//===--------------------------------------------------------------------===//		//===--------------------------------------------------------------------===//

▲ Show 20 Lines • Show All 105 Lines • Show Last 20 Lines

lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Show First 20 Lines • Show All 986 Lines • ▼ Show 20 Lines	void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
EVT LoVT, HiVT;		EVT LoVT, HiVT;
SDLoc dl(MLD);		SDLoc dl(MLD);
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));		std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));

SDValue Ch = MLD->getChain();		SDValue Ch = MLD->getChain();
SDValue Ptr = MLD->getBasePtr();		SDValue Ptr = MLD->getBasePtr();
SDValue Mask = MLD->getMask();		SDValue Mask = MLD->getMask();
unsigned Alignment = MLD->getOriginalAlignment();		unsigned Alignment = MLD->getOriginalAlignment();
		ISD::LoadExtType ExtType = MLD->getExtensionType();

// if Alignment is equal to the vector size,		// if Alignment is equal to the vector size,
// take the half of it for the second part		// take the half of it for the second part
unsigned SecondHalfAlignment =		unsigned SecondHalfAlignment =
(Alignment == MLD->getValueType(0).getSizeInBits()/8) ?		(Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
Alignment/2 : Alignment;		Alignment/2 : Alignment;

SDValue MaskLo, MaskHi;		SDValue MaskLo, MaskHi;
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);		std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);

EVT MemoryVT = MLD->getMemoryVT();		EVT MemoryVT = MLD->getMemoryVT();
EVT LoMemVT, HiMemVT;		EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);		std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);

SDValue Src0 = MLD->getSrc0();		SDValue Src0 = MLD->getSrc0();
SDValue Src0Lo, Src0Hi;		SDValue Src0Lo, Src0Hi;
std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);		std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);

MachineMemOperand *MMO = DAG.getMachineFunction().		MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(MLD->getPointerInfo(),		getMachineMemOperand(MLD->getPointerInfo(),
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),		MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
Alignment, MLD->getAAInfo(), MLD->getRanges());		Alignment, MLD->getAAInfo(), MLD->getRanges());

Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, MMO);		Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
		ExtType);

unsigned IncrementSize = LoMemVT.getSizeInBits()/8;		unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,		Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, Ptr.getValueType()));		DAG.getConstant(IncrementSize, Ptr.getValueType()));

MMO = DAG.getMachineFunction().		MMO = DAG.getMachineFunction().
getMachineMemOperand(MLD->getPointerInfo(),		getMachineMemOperand(MLD->getPointerInfo(),
MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),		MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());		SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());

Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, MMO);		Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
		ExtType);


// Build a factor node to remember that this load is independent of the		// Build a factor node to remember that this load is independent of the
// other one.		// other one.
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),		Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
Hi.getValue(1));		Hi.getValue(1));

// Legalized the chain result - switch anything that used the old chain to		// Legalized the chain result - switch anything that used the old chain to
▲ Show 20 Lines • Show All 421 Lines • ▼ Show 20 Lines	return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
MachinePointerInfo(), EltVT, false, false, false, 0);		MachinePointerInfo(), EltVT, false, false, false, 0);
}		}

SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,		SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
unsigned OpNo) {		unsigned OpNo) {
SDValue Ch = N->getChain();		SDValue Ch = N->getChain();
SDValue Ptr = N->getBasePtr();		SDValue Ptr = N->getBasePtr();
SDValue Mask = N->getMask();		SDValue Mask = N->getMask();
SDValue Data = N->getData();		SDValue Data = N->getValue();
EVT MemoryVT = N->getMemoryVT();		EVT MemoryVT = N->getMemoryVT();
unsigned Alignment = N->getOriginalAlignment();		unsigned Alignment = N->getOriginalAlignment();
SDLoc DL(N);		SDLoc DL(N);

EVT LoMemVT, HiMemVT;		EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);		std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);

SDValue DataLo, DataHi;		SDValue DataLo, DataHi;
GetSplitVector(Data, DataLo, DataHi);		GetSplitVector(Data, DataLo, DataHi);
SDValue MaskLo, MaskHi;		SDValue MaskLo, MaskHi;
GetSplitVector(Mask, MaskLo, MaskHi);		GetSplitVector(Mask, MaskLo, MaskHi);

// if Alignment is equal to the vector size,		// if Alignment is equal to the vector size,
// take the half of it for the second part		// take the half of it for the second part
unsigned SecondHalfAlignment =		unsigned SecondHalfAlignment =
(Alignment == Data->getValueType(0).getSizeInBits()/8) ?		(Alignment == Data->getValueType(0).getSizeInBits()/8) ?
Alignment/2 : Alignment;		Alignment/2 : Alignment;

SDValue Lo, Hi;		SDValue Lo, Hi;
MachineMemOperand *MMO = DAG.getMachineFunction().		MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(N->getPointerInfo(),		getMachineMemOperand(N->getPointerInfo(),
MachineMemOperand::MOStore, LoMemVT.getStoreSize(),		MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
Alignment, N->getAAInfo(), N->getRanges());		Alignment, N->getAAInfo(), N->getRanges());

Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, MMO);		Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
		N->isTruncatingStore());

unsigned IncrementSize = LoMemVT.getSizeInBits()/8;		unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,		Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, Ptr.getValueType()));		DAG.getConstant(IncrementSize, Ptr.getValueType()));

MMO = DAG.getMachineFunction().		MMO = DAG.getMachineFunction().
getMachineMemOperand(N->getPointerInfo(),		getMachineMemOperand(N->getPointerInfo(),
MachineMemOperand::MOStore, HiMemVT.getStoreSize(),		MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
SecondHalfAlignment, N->getAAInfo(), N->getRanges());		SecondHalfAlignment, N->getAAInfo(), N->getRanges());

Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, MMO);		Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
		N->isTruncatingStore());


// Build a factor node to remember that this store is independent of the		// Build a factor node to remember that this store is independent of the
// other one.		// other one.
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);		return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);

}		}

▲ Show 20 Lines • Show All 895 Lines • ▼ Show 20 Lines
}		}

SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {		SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {

EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0));		EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0));
SDValue Mask = N->getMask();		SDValue Mask = N->getMask();
EVT MaskVT = Mask.getValueType();		EVT MaskVT = Mask.getValueType();
SDValue Src0 = GetWidenedVector(N->getSrc0());		SDValue Src0 = GetWidenedVector(N->getSrc0());
		ISD::LoadExtType ExtType = N->getExtensionType();
SDLoc dl(N);		SDLoc dl(N);

if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)		if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
Mask = GetWidenedVector(Mask);		Mask = GetWidenedVector(Mask);
else {		else {
EVT BoolVT = getSetCCResultType(WidenVT);		EVT BoolVT = getSetCCResultType(WidenVT);

// We can't use ModifyToType() because we should fill the mask with		// We can't use ModifyToType() because we should fill the mask with
// zeroes		// zeroes
unsigned WidenNumElts = BoolVT.getVectorNumElements();		unsigned WidenNumElts = BoolVT.getVectorNumElements();
unsigned MaskNumElts = MaskVT.getVectorNumElements();		unsigned MaskNumElts = MaskVT.getVectorNumElements();

unsigned NumConcat = WidenNumElts / MaskNumElts;		unsigned NumConcat = WidenNumElts / MaskNumElts;
SmallVector<SDValue, 16> Ops(NumConcat);		SmallVector<SDValue, 16> Ops(NumConcat);
SDValue ZeroVal = DAG.getConstant(0, MaskVT);		SDValue ZeroVal = DAG.getConstant(0, MaskVT);
Ops[0] = Mask;		Ops[0] = Mask;
for (unsigned i = 1; i != NumConcat; ++i)		for (unsigned i = 1; i != NumConcat; ++i)
Ops[i] = ZeroVal;		Ops[i] = ZeroVal;

Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);		Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
}		}

// Rebuild memory operand because MemoryVT was changed
MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(N->getPointerInfo(),
MachineMemOperand::MOLoad, WidenVT.getStoreSize(),
N->getAlignment(), N->getAAInfo(), N->getRanges());

SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(),		SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(),
Mask, Src0, MMO);		Mask, Src0, N->getMemoryVT(),
		N->getMemOperand(), ExtType);
// Legalized the chain result - switch anything that used the old chain to		// Legalized the chain result - switch anything that used the old chain to
// use the new one.		// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));		ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;		return Res;
}		}

SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {		SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));		EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
▲ Show 20 Lines • Show All 135 Lines • ▼ Show 20 Lines
#endif		#endif
llvm_unreachable("Do not know how to widen this operator's operand!");		llvm_unreachable("Do not know how to widen this operator's operand!");

case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break;		case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break;
case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break;		case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;		case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;		case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::STORE: Res = WidenVecOp_STORE(N); break;		case ISD::STORE: Res = WidenVecOp_STORE(N); break;
		case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break;
case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;		case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;

case ISD::ANY_EXTEND:		case ISD::ANY_EXTEND:
case ISD::SIGN_EXTEND:		case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:		case ISD::ZERO_EXTEND:
Res = WidenVecOp_EXTEND(N);		Res = WidenVecOp_EXTEND(N);
break;		break;

▲ Show 20 Lines • Show All 182 Lines • ▼ Show 20 Lines	else
GenWidenVectorStores(StChain, ST);		GenWidenVectorStores(StChain, ST);

if (StChain.size() == 1)		if (StChain.size() == 1)
return StChain[0];		return StChain[0];
else		else
return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);		return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
}		}

		SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
		MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
		SDValue Mask = MST->getMask();
		EVT MaskVT = Mask.getValueType();
		SDValue StVal = MST->getValue();
		// Widen the value
		SDValue WideVal = GetWidenedVector(StVal);
		SDLoc dl(N);

		if (OpNo == 2 \|\| getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
		Mask = GetWidenedVector(Mask);
		else {
		// The mask should be widened as well
		EVT BoolVT = getSetCCResultType(WideVal.getValueType());
		// We can't use ModifyToType() because we should fill the mask with
		// zeroes
		unsigned WidenNumElts = BoolVT.getVectorNumElements();
		unsigned MaskNumElts = MaskVT.getVectorNumElements();

		unsigned NumConcat = WidenNumElts / MaskNumElts;
		SmallVector<SDValue, 16> Ops(NumConcat);
		SDValue ZeroVal = DAG.getConstant(0, MaskVT);
		Ops[0] = Mask;
		for (unsigned i = 1; i != NumConcat; ++i)
		Ops[i] = ZeroVal;

		Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
		}
		assert(Mask.getValueType().getVectorNumElements() ==
		WideVal.getValueType().getVectorNumElements() &&
		"Mask and data vectors should have the same number of elements");
		return DAG.getMaskedStore(MST->getChain(), dl, WideVal, MST->getBasePtr(),
		Mask, MST->getMemoryVT(), MST->getMemOperand(),
		false);
		}

SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {		SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
SDValue InOp0 = GetWidenedVector(N->getOperand(0));		SDValue InOp0 = GetWidenedVector(N->getOperand(0));
SDValue InOp1 = GetWidenedVector(N->getOperand(1));		SDValue InOp1 = GetWidenedVector(N->getOperand(1));
SDLoc dl(N);		SDLoc dl(N);

// WARNING: In this code we widen the compare instruction with garbage.		// WARNING: In this code we widen the compare instruction with garbage.
// This garbage may contain denormal floats which may be slow. Is this a real		// This garbage may contain denormal floats which may be slow. Is this a real
// concern ? Should we zero the unused lanes if this is a float compare ?		// concern ? Should we zero the unused lanes if this is a float compare ?
▲ Show 20 Lines • Show All 485 Lines • Show Last 20 Lines

lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Show First 20 Lines • Show All 4,918 Lines • ▼ Show 20 Lines	SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(),
ST->getMemOperand());		ST->getMemOperand());
CSEMap.InsertNode(N, IP);		CSEMap.InsertNode(N, IP);
InsertNode(N);		InsertNode(N);
return SDValue(N, 0);		return SDValue(N, 0);
}		}

SDValue		SDValue
SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain,		SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain,
SDValue Ptr, SDValue Mask, SDValue Src0,		SDValue Ptr, SDValue Mask, SDValue Src0, EVT MemVT,
MachineMemOperand *MMO) {		MachineMemOperand *MMO, ISD::LoadExtType ExtTy) {

SDVTList VTs = getVTList(VT, MVT::Other);		SDVTList VTs = getVTList(VT, MVT::Other);
SDValue Ops[] = { Chain, Ptr, Mask, Src0 };		SDValue Ops[] = { Chain, Ptr, Mask, Src0 };
FoldingSetNodeID ID;		FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops);		AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops);
ID.AddInteger(VT.getRawBits());		ID.AddInteger(VT.getRawBits());
ID.AddInteger(encodeMemSDNodeFlags(ISD::NON_EXTLOAD, ISD::UNINDEXED,		ID.AddInteger(encodeMemSDNodeFlags(ExtTy, ISD::UNINDEXED,
MMO->isVolatile(),		MMO->isVolatile(),
MMO->isNonTemporal(),		MMO->isNonTemporal(),
MMO->isInvariant()));		MMO->isInvariant()));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());		ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;		void *IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {		if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<MaskedLoadSDNode>(E)->refineAlignment(MMO);		cast<MaskedLoadSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);		return SDValue(E, 0);
}		}
SDNode *N = new (NodeAllocator) MaskedLoadSDNode(dl.getIROrder(),		SDNode *N = new (NodeAllocator) MaskedLoadSDNode(dl.getIROrder(),
dl.getDebugLoc(), Ops, 4, VTs,		dl.getDebugLoc(), Ops, 4, VTs,
VT, MMO);		ExtTy, MemVT, MMO);
CSEMap.InsertNode(N, IP);		CSEMap.InsertNode(N, IP);
InsertNode(N);		InsertNode(N);
return SDValue(N, 0);		return SDValue(N, 0);
}		}

SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,		SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
SDValue Ptr, SDValue Mask, MachineMemOperand *MMO) {		SDValue Ptr, SDValue Mask, EVT MemVT,
		MachineMemOperand *MMO, bool isTrunc) {
assert(Chain.getValueType() == MVT::Other &&		assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");		"Invalid chain type");
EVT VT = Val.getValueType();		EVT VT = Val.getValueType();
SDVTList VTs = getVTList(MVT::Other);		SDVTList VTs = getVTList(MVT::Other);
SDValue Ops[] = { Chain, Ptr, Mask, Val };		SDValue Ops[] = { Chain, Ptr, Mask, Val };
FoldingSetNodeID ID;		FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops);		AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops);
ID.AddInteger(VT.getRawBits());		ID.AddInteger(VT.getRawBits());
ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),		ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
MMO->isNonTemporal(), MMO->isInvariant()));		MMO->isNonTemporal(), MMO->isInvariant()));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());		ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;		void *IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {		if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<MaskedStoreSDNode>(E)->refineAlignment(MMO);		cast<MaskedStoreSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);		return SDValue(E, 0);
}		}
SDNode *N = new (NodeAllocator) MaskedStoreSDNode(dl.getIROrder(),		SDNode *N = new (NodeAllocator) MaskedStoreSDNode(dl.getIROrder(),
dl.getDebugLoc(), Ops, 4,		dl.getDebugLoc(), Ops, 4,
VTs, VT, MMO);		VTs, isTrunc, MemVT, MMO);
CSEMap.InsertNode(N, IP);		CSEMap.InsertNode(N, IP);
InsertNode(N);		InsertNode(N);
return SDValue(N, 0);		return SDValue(N, 0);
}		}

SDValue SelectionDAG::getVAArg(EVT VT, SDLoc dl,		SDValue SelectionDAG::getVAArg(EVT VT, SDLoc dl,
SDValue Chain, SDValue Ptr,		SDValue Chain, SDValue Ptr,
SDValue SV,		SDValue SV,
▲ Show 20 Lines • Show All 1,882 Lines • Show Last 20 Lines

lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 3,686 Lines • ▼ Show 20 Lines	void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
AAMDNodes AAInfo;		AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);		I.getAAMetadata(AAInfo);

MachineMemOperand *MMO =		MachineMemOperand *MMO =
DAG.getMachineFunction().		DAG.getMachineFunction().
getMachineMemOperand(MachinePointerInfo(PtrOperand),		getMachineMemOperand(MachinePointerInfo(PtrOperand),
MachineMemOperand::MOStore, VT.getStoreSize(),		MachineMemOperand::MOStore, VT.getStoreSize(),
Alignment, AAInfo);		Alignment, AAInfo);
SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, MMO);		SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT,
		MMO, false);
DAG.setRoot(StoreNode);		DAG.setRoot(StoreNode);
setValue(&I, StoreNode);		setValue(&I, StoreNode);
}		}

void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {		void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {
SDLoc sdl = getCurSDLoc();		SDLoc sdl = getCurSDLoc();

// @llvm.masked.load.*(Ptr, alignment, Mask, Src0)		// @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
Show All 22 Lines	void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {
}		}

MachineMemOperand *MMO =		MachineMemOperand *MMO =
DAG.getMachineFunction().		DAG.getMachineFunction().
getMachineMemOperand(MachinePointerInfo(PtrOperand),		getMachineMemOperand(MachinePointerInfo(PtrOperand),
MachineMemOperand::MOLoad, VT.getStoreSize(),		MachineMemOperand::MOLoad, VT.getStoreSize(),
Alignment, AAInfo, Ranges);		Alignment, AAInfo, Ranges);

SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, MMO);		SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
		ISD::NON_EXTLOAD);
SDValue OutChain = Load.getValue(1);		SDValue OutChain = Load.getValue(1);
DAG.setRoot(OutChain);		DAG.setRoot(OutChain);
setValue(&I, Load);		setValue(&I, Load);
}		}

void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {		void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
SDLoc dl = getCurSDLoc();		SDLoc dl = getCurSDLoc();
AtomicOrdering SuccessOrder = I.getSuccessOrdering();		AtomicOrdering SuccessOrder = I.getSuccessOrdering();
▲ Show 20 Lines • Show All 4,237 Lines • Show Last 20 Lines

lib/Target/X86/X86ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 1,673 Lines • ▼ Show 20 Lines
setTargetDAGCombine(ISD::OR);		setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::AND);		setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::ADD);		setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::FADD);		setTargetDAGCombine(ISD::FADD);
setTargetDAGCombine(ISD::FSUB);		setTargetDAGCombine(ISD::FSUB);
setTargetDAGCombine(ISD::FMA);		setTargetDAGCombine(ISD::FMA);
setTargetDAGCombine(ISD::SUB);		setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::LOAD);		setTargetDAGCombine(ISD::LOAD);
		setTargetDAGCombine(ISD::MLOAD);
setTargetDAGCombine(ISD::STORE);		setTargetDAGCombine(ISD::STORE);
		setTargetDAGCombine(ISD::MSTORE);
setTargetDAGCombine(ISD::ZERO_EXTEND);		setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);		setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);		setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);		setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
setTargetDAGCombine(ISD::TRUNCATE);		setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::SINT_TO_FP);		setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::SETCC);		setTargetDAGCombine(ISD::SETCC);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);		setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
▲ Show 20 Lines • Show All 23,040 Lines • ▼ Show 20 Lines	if (RegVT.is256BitVector() && Subtarget->isUnalignedMem32Slow() &&
NewVec = Insert128BitVector(NewVec, Load1, 0, DAG, dl);		NewVec = Insert128BitVector(NewVec, Load1, 0, DAG, dl);
NewVec = Insert128BitVector(NewVec, Load2, NumElems/2, DAG, dl);		NewVec = Insert128BitVector(NewVec, Load2, NumElems/2, DAG, dl);
return DCI.CombineTo(N, NewVec, TF, true);		return DCI.CombineTo(N, NewVec, TF, true);
}		}

return SDValue();		return SDValue();
}		}

		/// PerformMLOADCombine - Resolve extending loads
		static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG,
		TargetLowering::DAGCombinerInfo &DCI,
		const X86Subtarget *Subtarget) {
		MaskedLoadSDNode *Mld = cast<MaskedLoadSDNode>(N);
		if (Mld->getExtensionType() != ISD::SEXTLOAD)
		return SDValue();

		EVT VT = Mld->getValueType(0);
		const TargetLowering &TLI = DAG.getTargetLoweringInfo();
		unsigned NumElems = VT.getVectorNumElements();
		EVT LdVT = Mld->getMemoryVT();
		SDLoc dl(Mld);

		assert(LdVT != VT && "Cannot extend to the same type");
		unsigned ToSz = VT.getVectorElementType().getSizeInBits();
		unsigned FromSz = LdVT.getVectorElementType().getSizeInBits();
		// From, To sizes and ElemCount must be pow of two
		assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
		"Unexpected size for extending masked load");

		unsigned SizeRatio = ToSz / FromSz;
		assert(SizeRatio * NumElems * FromSz == VT.getSizeInBits());

		// Create a type on which we perform the shuffle
		EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
		LdVT.getScalarType(), NumElems*SizeRatio);
		assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());

		// Convert Src0 value
		SDValue WideSrc0 = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mld->getSrc0());
		if (Mld->getSrc0().getOpcode() != ISD::UNDEF) {
		SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
		for (unsigned i = 0; i != NumElems; ++i)
		ShuffleVec[i] = i * SizeRatio;

		// Can't shuffle using an illegal type.
		assert (TLI.isTypeLegal(WideVecVT) && "WideVecVT should be legal");
		WideSrc0 = DAG.getVectorShuffle(WideVecVT, dl, WideSrc0,
		DAG.getUNDEF(WideVecVT), &ShuffleVec[0]);
		}
		// Prepare the new mask
		SDValue NewMask;
		SDValue Mask = Mld->getMask();
		if (Mask.getValueType() == VT) {
		// Mask and original value have the same type
		NewMask = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mask);
		SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
		for (unsigned i = 0; i != NumElems; ++i)
		ShuffleVec[i] = i * SizeRatio;
		for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i)
		ShuffleVec[i] = NumElems*SizeRatio;
		NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask,
		DAG.getConstant(0, WideVecVT),
		&ShuffleVec[0]);
		}
		else {
		assert(Mask.getValueType().getVectorElementType() == MVT::i1);
		unsigned WidenNumElts = NumElems*SizeRatio;
		unsigned MaskNumElts = VT.getVectorNumElements();
		EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
		WidenNumElts);

		unsigned NumConcat = WidenNumElts / MaskNumElts;
		SmallVector<SDValue, 16> Ops(NumConcat);
		SDValue ZeroVal = DAG.getConstant(0, Mask.getValueType());
		Ops[0] = Mask;
		for (unsigned i = 1; i != NumConcat; ++i)
		Ops[i] = ZeroVal;

		NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops);
		}

		SDValue WideLd = DAG.getMaskedLoad(WideVecVT, dl, Mld->getChain(),
		Mld->getBasePtr(), NewMask, WideSrc0,
		Mld->getMemoryVT(), Mld->getMemOperand(),
		ISD::NON_EXTLOAD);
		SDValue NewVec = DAG.getNode(X86ISD::VSEXT, dl, VT, WideLd);
		return DCI.CombineTo(N, NewVec, WideLd.getValue(1), true);

		}
		/// PerformMSTORECombine - Resolve truncating stores
		static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
		const X86Subtarget *Subtarget) {
		MaskedStoreSDNode *Mst = cast<MaskedStoreSDNode>(N);
		if (!Mst->isTruncatingStore())
		return SDValue();

		EVT VT = Mst->getValue().getValueType();
		const TargetLowering &TLI = DAG.getTargetLoweringInfo();
		unsigned NumElems = VT.getVectorNumElements();
		EVT StVT = Mst->getMemoryVT();
		SDLoc dl(Mst);

		assert(StVT != VT && "Cannot truncate to the same type");
		unsigned FromSz = VT.getVectorElementType().getSizeInBits();
		unsigned ToSz = StVT.getVectorElementType().getSizeInBits();

		// From, To sizes and ElemCount must be pow of two
		assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
		"Unexpected size for truncating masked store");
		// We are going to use the original vector elt for storing.
		// Accumulated smaller vector elements must be a multiple of the store size.
		assert (((NumElems * FromSz) % ToSz) == 0 &&
		"Unexpected ratio for truncating masked store");

		unsigned SizeRatio = FromSz / ToSz;
		assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits());

		// Create a type on which we perform the shuffle
		EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
		StVT.getScalarType(), NumElems*SizeRatio);

		assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());

		SDValue WideVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mst->getValue());
		SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
		for (unsigned i = 0; i != NumElems; ++i)
		ShuffleVec[i] = i * SizeRatio;

		// Can't shuffle using an illegal type.
		assert (TLI.isTypeLegal(WideVecVT) && "WideVecVT should be legal");

		SDValue TruncatedVal = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
		DAG.getUNDEF(WideVecVT),
		&ShuffleVec[0]);

		SDValue NewMask;
		SDValue Mask = Mst->getMask();
		if (Mask.getValueType() == VT) {
		// Mask and original value have the same type
		NewMask = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mask);
		for (unsigned i = 0; i != NumElems; ++i)
		ShuffleVec[i] = i * SizeRatio;
		for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i)
		ShuffleVec[i] = NumElems*SizeRatio;
		NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask,
		DAG.getConstant(0, WideVecVT),
		&ShuffleVec[0]);
		}
		else {
		assert(Mask.getValueType().getVectorElementType() == MVT::i1);
		unsigned WidenNumElts = NumElems*SizeRatio;
		unsigned MaskNumElts = VT.getVectorNumElements();
		EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
		WidenNumElts);

		unsigned NumConcat = WidenNumElts / MaskNumElts;
		SmallVector<SDValue, 16> Ops(NumConcat);
		SDValue ZeroVal = DAG.getConstant(0, Mask.getValueType());
		Ops[0] = Mask;
		for (unsigned i = 1; i != NumConcat; ++i)
		Ops[i] = ZeroVal;

		NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops);
		}

		return DAG.getMaskedStore(Mst->getChain(), dl, TruncatedVal, Mst->getBasePtr(),
		NewMask, StVT, Mst->getMemOperand(), false);
		}
/// PerformSTORECombine - Do target-specific dag combines on STORE nodes.		/// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,		static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {		const X86Subtarget *Subtarget) {
StoreSDNode *St = cast<StoreSDNode>(N);		StoreSDNode *St = cast<StoreSDNode>(N);
EVT VT = St->getValue().getValueType();		EVT VT = St->getValue().getValueType();
EVT StVT = St->getMemoryVT();		EVT StVT = St->getMemoryVT();
SDLoc dl(St);		SDLoc dl(St);
SDValue StoredVal = St->getOperand(1);		SDValue StoredVal = St->getOperand(1);
▲ Show 20 Lines • Show All 1,082 Lines • ▼ Show 20 Lines	SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::MUL: return PerformMulCombine(N, DAG, DCI);		case ISD::MUL: return PerformMulCombine(N, DAG, DCI);
case ISD::SHL:		case ISD::SHL:
case ISD::SRA:		case ISD::SRA:
case ISD::SRL: return PerformShiftCombine(N, DAG, DCI, Subtarget);		case ISD::SRL: return PerformShiftCombine(N, DAG, DCI, Subtarget);
case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget);		case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget);
case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget);		case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget);
case ISD::XOR: return PerformXorCombine(N, DAG, DCI, Subtarget);		case ISD::XOR: return PerformXorCombine(N, DAG, DCI, Subtarget);
case ISD::LOAD: return PerformLOADCombine(N, DAG, DCI, Subtarget);		case ISD::LOAD: return PerformLOADCombine(N, DAG, DCI, Subtarget);
		case ISD::MLOAD: return PerformMLOADCombine(N, DAG, DCI, Subtarget);
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);		case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
		case ISD::MSTORE: return PerformMSTORECombine(N, DAG, Subtarget);
case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this);		case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this);
case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget);		case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget);
case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);		case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);
case X86ISD::FXOR:		case X86ISD::FXOR:
case X86ISD::FOR: return PerformFORCombine(N, DAG);		case X86ISD::FOR: return PerformFORCombine(N, DAG);
case X86ISD::FMIN:		case X86ISD::FMIN:
case X86ISD::FMAX: return PerformFMinFMaxCombine(N, DAG);		case X86ISD::FMAX: return PerformFMinFMaxCombine(N, DAG);
case X86ISD::FAND: return PerformFANDCombine(N, DAG);		case X86ISD::FAND: return PerformFANDCombine(N, DAG);
▲ Show 20 Lines • Show All 846 Lines • Show Last 20 Lines

lib/Target/X86/X86TargetTransformInfo.cpp

Show First 20 Lines • Show All 90 Lines • ▼ Show 20 Lines	public:
unsigned getCastInstrCost(unsigned Opcode, Type *Dst,		unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
Type *Src) const override;		Type *Src) const override;
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,		unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
Type *CondTy) const override;		Type *CondTy) const override;
unsigned getVectorInstrCost(unsigned Opcode, Type *Val,		unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index) const override;		unsigned Index) const override;
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,		unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace) const override;		unsigned AddressSpace) const override;
		unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
		unsigned AddressSpace) const override;

unsigned getAddressComputationCost(Type *PtrTy,		unsigned getAddressComputationCost(Type *PtrTy,
bool IsComplex) const override;		bool IsComplex) const override;

unsigned getReductionCost(unsigned Opcode, Type *Ty,		unsigned getReductionCost(unsigned Opcode, Type *Ty,
bool IsPairwiseForm) const override;		bool IsPairwiseForm) const override;

unsigned getIntImmCost(int64_t) const;		unsigned getIntImmCost(int64_t) const;
▲ Show 20 Lines • Show All 805 Lines • ▼ Show 20 Lines	unsigned X86TTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
// On Sandybridge 256bit load/stores are double pumped		// On Sandybridge 256bit load/stores are double pumped
// (but not on Haswell).		// (but not on Haswell).
if (LT.second.getSizeInBits() > 128 && !ST->hasAVX2())		if (LT.second.getSizeInBits() > 128 && !ST->hasAVX2())
Cost*=2;		Cost*=2;

return Cost;		return Cost;
}		}

		unsigned X86TTI::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
		unsigned Alignment,
		unsigned AddressSpace) const {
		VectorType *VTy = dyn_cast<VectorType>(Src);
		if (!VTy)
		// To calculate scalar take the regular cost, without mask
		return getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);

		unsigned NumElem = VTy->getVectorNumElements();
		if ((Opcode == Instruction::Load && !isLegalMaskedLoad(Src, 1)) \|\|
		(Opcode == Instruction::Store && !isLegalMaskedStore(Src, 1)) \|\|
		!isPowerOf2_32(NumElem)) {
		// Scalarization
		unsigned SplitCost = getScalarizationOverhead(Src,
		Opcode == Instruction::Load,
		Opcode==Instruction::Store);
		unsigned Cost =
		TargetTransformInfo::getMemoryOpCost(Opcode, VTy->getScalarType(),
		Alignment, AddressSpace) +
		getCFInstrCost(Instruction::Br);
		// Take SplitCost*2 because we split data and mask
		return NumElem * Cost + SplitCost*2;
		}

		// Legalize the type.
		std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
		unsigned Cost = 0;
		MVT SrcVT = TLI->getValueType(Src).getSimpleVT();
		if (LT.second != SrcVT && LT.second.getVectorNumElements() == NumElem)
		// Promotion requires expand/truncate for data and for mask.
		// Expand/truncate for data takes 1 cycle and 2 cycles for mask,
		// because it should be filled with zeroes.
		Cost += 3;

		else if (LT.second.getVectorNumElements() > NumElem)
		// Expanding requires fill mask with zeroes
		Cost += 1;

		if (!ST->hasAVX512())
		return Cost + LT.first*4; // Each maskmov costs 4

		return Cost+LT.first;
		}

unsigned X86TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {		unsigned X86TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
// Address computations in vectorized code with non-consecutive addresses will		// Address computations in vectorized code with non-consecutive addresses will
// likely result in more instructions compared to scalar code where the		// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting		// computation can more often be merged into the index mode. The resulting
// extra micro-ops can significantly decrease throughput.		// extra micro-ops can significantly decrease throughput.
unsigned NumVectorInstToHideOverhead = 10;		unsigned NumVectorInstToHideOverhead = 10;

if (Ty->isVectorTy() && IsComplex)		if (Ty->isVectorTy() && IsComplex)
▲ Show 20 Lines • Show All 249 Lines • Show Last 20 Lines

lib/Transforms/Vectorize/LoopVectorize.cpp

Show First 20 Lines • Show All 6,073 Lines • ▼ Show 20 Lines	if (!ConsecutiveStride \|\| ScalarAllocatedSize != VectorElementSize) {
Cost += VF * TTI.getAddressComputationCost(PtrTy, IsComplexComputation);		Cost += VF * TTI.getAddressComputationCost(PtrTy, IsComplexComputation);
Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),		Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
Alignment, AS);		Alignment, AS);
return Cost;		return Cost;
}		}

// Wide load/stores.		// Wide load/stores.
unsigned Cost = TTI.getAddressComputationCost(VectorTy);		unsigned Cost = TTI.getAddressComputationCost(VectorTy);
		if (Legal->isMaskRequired(I))
		Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment,
		AS);
		else
Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);		Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS);

if (Reverse)		if (Reverse)
Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,		Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
VectorTy, 0);		VectorTy, 0);
return Cost;		return Cost;
}		}
case Instruction::ZExt:		case Instruction::ZExt:
case Instruction::SExt:		case Instruction::SExt:
▲ Show 20 Lines • Show All 228 Lines • Show Last 20 Lines

test/Analysis/CostModel/X86/masked-intrinsic-cost.ll

				; RUN: opt -S -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -cost-model -analyze < %s \| FileCheck %s -check-prefix=AVX2


				; AVX2-LABEL: test1
				; AVX2: Found an estimated cost of 4 {{.*}}.masked
				define <2 x double> @test1(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %dst) {
				%mask = icmp eq <2 x i64> %trigger, zeroinitializer
				%res = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %addr, i32 4, <2 x i1>%mask, <2 x double>%dst)
				ret <2 x double> %res
				}

				; AVX2-LABEL: test2
				; AVX2: Found an estimated cost of 4 {{.*}}.masked
				define <4 x i32> @test2(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) {
				%mask = icmp eq <4 x i32> %trigger, zeroinitializer
				%res = call <4 x i32> @llvm.masked.load.v4i32(<4 x i32>* %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst)
				ret <4 x i32> %res
				}

				; AVX2-LABEL: test3
				; AVX2: Found an estimated cost of 4 {{.*}}.masked
				define void @test3(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) {
				%mask = icmp eq <4 x i32> %trigger, zeroinitializer
				call void @llvm.masked.store.v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1>%mask)
				ret void
				}

				; AVX2-LABEL: test4
				; AVX2: Found an estimated cost of 4 {{.*}}.masked
				define <8 x float> @test4(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) {
				%mask = icmp eq <8 x i32> %trigger, zeroinitializer
				%res = call <8 x float> @llvm.masked.load.v8f32(<8 x float>* %addr, i32 4, <8 x i1>%mask, <8 x float>%dst)
				ret <8 x float> %res
				}

				; AVX2-LABEL: test5
				; AVX2: Found an estimated cost of 5 {{.*}}.masked
				define void @test5(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
				%mask = icmp eq <2 x i32> %trigger, zeroinitializer
				call void @llvm.masked.store.v2f32(<2 x float>%val, <2 x float>* %addr, i32 4, <2 x i1>%mask)
				ret void
				}

				; AVX2-LABEL: test6
				; AVX2: Found an estimated cost of 7 {{.*}}.masked
				define void @test6(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {
				%mask = icmp eq <2 x i32> %trigger, zeroinitializer
				call void @llvm.masked.store.v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask)
				ret void
				}

				; AVX2-LABEL: test7
				; AVX2: Found an estimated cost of 5 {{.*}}.masked
				define <2 x float> @test7(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %dst) {
				%mask = icmp eq <2 x i32> %trigger, zeroinitializer
				%res = call <2 x float> @llvm.masked.load.v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>%dst)
				ret <2 x float> %res
				}

				; AVX2-LABEL: test8
				; AVX2: Found an estimated cost of 7 {{.*}}.masked
				define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
				%mask = icmp eq <2 x i32> %trigger, zeroinitializer
				%res = call <2 x i32> @llvm.masked.load.v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst)
				ret <2 x i32> %res
				}


				declare <16 x i32> @llvm.masked.load.v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>)
				declare <4 x i32> @llvm.masked.load.v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
				declare <2 x i32> @llvm.masked.load.v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>)
				declare void @llvm.masked.store.v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>)
				declare void @llvm.masked.store.v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>)
				declare void @llvm.masked.store.v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>)
				declare void @llvm.masked.store.v2f32(<2 x float>, <2 x float>*, i32, <2 x i1>)
				declare void @llvm.masked.store.v2i32(<2 x i32>, <2 x i32>*, i32, <2 x i1>)
				declare void @llvm.masked.store.v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>)
				declare void @llvm.masked.store.v16f32p(<16 x float>, <16 x float>*, i32, <16 x i1>)
				declare <16 x float> @llvm.masked.load.v16f32(<16 x float>*, i32, <16 x i1>, <16 x float>)
				declare <8 x float> @llvm.masked.load.v8f32(<8 x float>*, i32, <8 x i1>, <8 x float>)
				declare <4 x float> @llvm.masked.load.v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>)
				declare <2 x float> @llvm.masked.load.v2f32(<2 x float>*, i32, <2 x i1>, <2 x float>)
				declare <8 x double> @llvm.masked.load.v8f64(<8 x double>*, i32, <8 x i1>, <8 x double>)
				declare <4 x double> @llvm.masked.load.v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>)
				declare <2 x double> @llvm.masked.load.v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>)
				declare void @llvm.masked.store.v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>)
				declare void @llvm.masked.store.v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>)
				declare void @llvm.masked.store.v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>)

test/CodeGen/X86/masked_memop.ll

	Show First 20 Lines • Show All 153 Lines • ▼ Show 20 Lines
	; AVX2: vmaskmovps			; AVX2: vmaskmovps
	define void @test14(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {			define void @test14(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
	%mask = icmp eq <2 x i32> %trigger, zeroinitializer			%mask = icmp eq <2 x i32> %trigger, zeroinitializer
	call void @llvm.masked.store.v2f32(<2 x float>%val, <2 x float>* %addr, i32 4, <2 x i1>%mask)			call void @llvm.masked.store.v2f32(<2 x float>%val, <2 x float>* %addr, i32 4, <2 x i1>%mask)
	ret void			ret void
	}			}

	; AVX2-LABEL: test15			; AVX2-LABEL: test15
	; AVX2: vpmaskmovq			; AVX2: vpmaskmovd
	define void @test15(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {			define void @test15(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {
	%mask = icmp eq <2 x i32> %trigger, zeroinitializer			%mask = icmp eq <2 x i32> %trigger, zeroinitializer
	call void @llvm.masked.store.v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask)			call void @llvm.masked.store.v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask)
	ret void			ret void
	}			}

	; AVX2-LABEL: test16			; AVX2-LABEL: test16
	; AVX2: vmaskmovps			; AVX2: vmaskmovps
	; AVX2: vblendvps			; AVX2: vblendvps
	define <2 x float> @test16(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %dst) {			define <2 x float> @test16(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %dst) {
	%mask = icmp eq <2 x i32> %trigger, zeroinitializer			%mask = icmp eq <2 x i32> %trigger, zeroinitializer
	%res = call <2 x float> @llvm.masked.load.v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>%dst)			%res = call <2 x float> @llvm.masked.load.v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>%dst)
	ret <2 x float> %res			ret <2 x float> %res
	}			}

	; AVX2-LABEL: test17			; AVX2-LABEL: test17
	; AVX2: vpmaskmovq			; AVX2: vpmaskmovd
	; AVX2: vblendvpd			; AVX2: vblendvps
				; AVX2: vpmovsxdq
	define <2 x i32> @test17(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {			define <2 x i32> @test17(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
	%mask = icmp eq <2 x i32> %trigger, zeroinitializer			%mask = icmp eq <2 x i32> %trigger, zeroinitializer
	%res = call <2 x i32> @llvm.masked.load.v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst)			%res = call <2 x i32> @llvm.masked.load.v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst)
	ret <2 x i32> %res			ret <2 x i32> %res
	}			}

	; AVX2-LABEL: test18			; AVX2-LABEL: test18
	; AVX2: vmaskmovps			; AVX2: vmaskmovps
	Show All 29 Lines

This is an archive of the discontinued LLVM Phabricator instance.

Masked load/store for types that require legalization.Needs ReviewPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 18149

include/llvm/Analysis/TargetTransformInfo.h

include/llvm/CodeGen/SelectionDAG.h

include/llvm/CodeGen/SelectionDAGNodes.h

lib/Analysis/TargetTransformInfo.cpp

lib/CodeGen/BasicTargetTransformInfo.cpp

lib/CodeGen/SelectionDAG/DAGCombiner.cpp

lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

lib/CodeGen/SelectionDAG/LegalizeTypes.h

lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

lib/CodeGen/SelectionDAG/SelectionDAG.cpp

lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

lib/Target/X86/X86ISelLowering.cpp

lib/Target/X86/X86TargetTransformInfo.cpp

lib/Transforms/Vectorize/LoopVectorize.cpp

test/Analysis/CostModel/X86/masked-intrinsic-cost.ll

test/CodeGen/X86/masked_memop.ll

Masked load/store for types that require legalization.
Needs ReviewPublic