Diff 70704

include/llvm/CodeGen/SelectionDAG.h

//===-- llvm/CodeGen/SelectionDAG.h - InstSelection DAG ---------- C++ --===//		//===-- llvm/CodeGen/SelectionDAG.h - InstSelection DAG ---------- C++ --===//
		delenaUnsubmitted Not Done Reply Inline Actions wrong code alignment delena: wrong code alignment
//		//
// The LLVM Compiler Infrastructure		// The LLVM Compiler Infrastructure
//		//
// This file is distributed under the University of Illinois Open Source		// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.		// License. See LICENSE.TXT for details.
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
//		//
▲ Show 20 Lines • Show All 959 Lines • ▼ Show 20 Lines	#endif
/// Returns sum of the base pointer and offset.		/// Returns sum of the base pointer and offset.
SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, const SDLoc &DL);		SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, const SDLoc &DL);

SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr,		SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr,
SDValue Mask, SDValue Src0, EVT MemVT,		SDValue Mask, SDValue Src0, EVT MemVT,
MachineMemOperand *MMO, ISD::LoadExtType);		MachineMemOperand *MMO, ISD::LoadExtType);
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val,		SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val,
SDValue Ptr, SDValue Mask, EVT MemVT,		SDValue Ptr, SDValue Mask, EVT MemVT,
MachineMemOperand *MMO, bool IsTrunc);		MachineMemOperand *MMO, bool IsTrunc, bool isCompressed = false);
SDValue getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,		SDValue getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
ArrayRef<SDValue> Ops, MachineMemOperand *MMO);		ArrayRef<SDValue> Ops, MachineMemOperand *MMO);
SDValue getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,		SDValue getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
ArrayRef<SDValue> Ops, MachineMemOperand *MMO);		ArrayRef<SDValue> Ops, MachineMemOperand *MMO);
/// Construct a node to track a Value* through the backend.		/// Construct a node to track a Value* through the backend.
SDValue getSrcValue(const Value *v);		SDValue getSrcValue(const Value *v);

/// Return an MDNodeSDNode which holds an MDNode.		/// Return an MDNodeSDNode which holds an MDNode.
▲ Show 20 Lines • Show All 456 Lines • Show Last 20 Lines

include/llvm/CodeGen/SelectionDAGNodes.h

//===-- llvm/CodeGen/SelectionDAGNodes.h - SelectionDAG Nodes ---- C++ --===//		//===-- llvm/CodeGen/SelectionDAGNodes.h - SelectionDAG Nodes ---- C++ --===//
//		//
// The LLVM Compiler Infrastructure		// The LLVM Compiler Infrastructure
//		//
// This file is distributed under the University of Illinois Open Source		// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.		// License. See LICENSE.TXT for details.
		delenaUnsubmitted Not Done Reply Inline Actions Please add comments before the API. Please explain carefully what the compression means. delena: Please add comments before the API. Please explain carefully what the compression means.
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
//		//
// This file declares the SDNode class and derived classes, which are used to		// This file declares the SDNode class and derived classes, which are used to
// represent the nodes and operations present in a SelectionDAG. These nodes		// represent the nodes and operations present in a SelectionDAG. These nodes
// and operations are machine code level operations, with some similarities to		// and operations are machine code level operations, with some similarities to
// the GCC RTL representation.		// the GCC RTL representation.
//		//
▲ Show 20 Lines • Show All 440 Lines • ▼ Show 20 Lines	protected:

class StoreSDNodeBitfields {		class StoreSDNodeBitfields {
friend class StoreSDNode;		friend class StoreSDNode;
friend class MaskedStoreSDNode;		friend class MaskedStoreSDNode;

uint16_t : NumLSBaseSDNodeBits;		uint16_t : NumLSBaseSDNodeBits;

uint16_t IsTruncating : 1;		uint16_t IsTruncating : 1;
		uint16_t IsCompressed : 1;
		delenaUnsubmitted Not Done Reply Inline Actions Please change it to IsCompressing. It will be compatible with IsTrucating. delena: Please change it to IsCompressing. It will be compatible with IsTrucating.
};		};

union {		union {
SDNodeBitfields SDNodeBits;		SDNodeBitfields SDNodeBits;
ConstantSDNodeBitfields ConstantSDNodeBits;		ConstantSDNodeBitfields ConstantSDNodeBits;
MemSDNodeBitfields MemSDNodeBits;		MemSDNodeBitfields MemSDNodeBits;
LSBaseSDNodeBitfields LSBaseSDNodeBits;		LSBaseSDNodeBitfields LSBaseSDNodeBits;
LoadSDNodeBitfields LoadSDNodeBits;		LoadSDNodeBitfields LoadSDNodeBits;
▲ Show 20 Lines • Show All 1,482 Lines • ▼ Show 20 Lines
};		};

/// This class is used to represent an MSTORE node		/// This class is used to represent an MSTORE node
class MaskedStoreSDNode : public MaskedLoadStoreSDNode {		class MaskedStoreSDNode : public MaskedLoadStoreSDNode {

public:		public:
friend class SelectionDAG;		friend class SelectionDAG;
MaskedStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,		MaskedStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
bool isTrunc, EVT MemVT, MachineMemOperand *MMO)		bool isTrunc, bool isCompressed, EVT MemVT,
		MachineMemOperand *MMO)
: MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, VTs, MemVT, MMO) {		: MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, VTs, MemVT, MMO) {
StoreSDNodeBits.IsTruncating = isTrunc;		StoreSDNodeBits.IsTruncating = isTrunc;
		StoreSDNodeBits.IsCompressed = isCompressed;
}		}
/// Return true if the op does a truncation before store.		/// Return true if the op does a truncation before store.
/// For integers this is the same as doing a TRUNCATE and storing the result.		/// For integers this is the same as doing a TRUNCATE and storing the result.
/// For floats, it is the same as doing an FP_ROUND and storing the result.		/// For floats, it is the same as doing an FP_ROUND and storing the result.
bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }		bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }

		bool isCompressedStore() const { return StoreSDNodeBits.IsCompressed; }

const SDValue &getValue() const { return getOperand(3); }		const SDValue &getValue() const { return getOperand(3); }

static bool classof(const SDNode *N) {		static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::MSTORE;		return N->getOpcode() == ISD::MSTORE;
}		}
};		};

/// This is a base class used to represent		/// This is a base class used to represent
▲ Show 20 Lines • Show All 217 Lines • Show Last 20 Lines

lib/CodeGen/SelectionDAG/SelectionDAG.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 5,340 Lines • ▼ Show 20 Lines	SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
CSEMap.InsertNode(N, IP);		CSEMap.InsertNode(N, IP);
InsertNode(N);		InsertNode(N);
return SDValue(N, 0);		return SDValue(N, 0);
}		}

SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,		SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
SDValue Val, SDValue Ptr, SDValue Mask,		SDValue Val, SDValue Ptr, SDValue Mask,
EVT MemVT, MachineMemOperand *MMO,		EVT MemVT, MachineMemOperand *MMO,
bool isTrunc) {		bool isTrunc, bool isCompressed) {
assert(Chain.getValueType() == MVT::Other &&		assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");		"Invalid chain type");
EVT VT = Val.getValueType();		EVT VT = Val.getValueType();
SDVTList VTs = getVTList(MVT::Other);		SDVTList VTs = getVTList(MVT::Other);
SDValue Ops[] = { Chain, Ptr, Mask, Val };		SDValue Ops[] = { Chain, Ptr, Mask, Val };
FoldingSetNodeID ID;		FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops);		AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops);
ID.AddInteger(VT.getRawBits());		ID.AddInteger(VT.getRawBits());
ID.AddInteger(getSyntheticNodeSubclassData<MaskedStoreSDNode>(		ID.AddInteger(getSyntheticNodeSubclassData<MaskedStoreSDNode>(
dl.getIROrder(), VTs, isTrunc, MemVT, MMO));		dl.getIROrder(), VTs, isTrunc, isCompressed, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());		ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;		void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {		if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<MaskedStoreSDNode>(E)->refineAlignment(MMO);		cast<MaskedStoreSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);		return SDValue(E, 0);
}		}
auto *N = newSDNode<MaskedStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,		auto *N = newSDNode<MaskedStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
isTrunc, MemVT, MMO);		isTrunc, isCompressed, MemVT, MMO);
createOperands(N, Ops);		createOperands(N, Ops);

CSEMap.InsertNode(N, IP);		CSEMap.InsertNode(N, IP);
InsertNode(N);		InsertNode(N);
return SDValue(N, 0);		return SDValue(N, 0);
}		}

SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,		SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
▲ Show 20 Lines • Show All 1,941 Lines • Show Last 20 Lines

lib/Target/X86/X86ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//		//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
//		//
// The LLVM Compiler Infrastructure		// The LLVM Compiler Infrastructure
//		//
// This file is distributed under the University of Illinois Open Source		// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.		// License. See LICENSE.TXT for details.
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
//		//
		delenaUnsubmitted Not Done Reply Inline Actions Code alignment delena: Code alignment
// This file defines the interfaces that X86 uses to lower LLVM code into a		// This file defines the interfaces that X86 uses to lower LLVM code into a
// selection DAG.		// selection DAG.
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#include "X86ISelLowering.h"		#include "X86ISelLowering.h"
#include "Utils/X86ShuffleDecode.h"		#include "Utils/X86ShuffleDecode.h"
#include "X86CallingConv.h"		#include "X86CallingConv.h"
▲ Show 20 Lines • Show All 18,835 Lines • ▼ Show 20 Lines	case COMPRESS_TO_MEM: {
MVT VT = DataToCompress.getSimpleValueType();		MVT VT = DataToCompress.getSimpleValueType();

MemIntrinsicSDNode *MemIntr = dyn_cast<MemIntrinsicSDNode>(Op);		MemIntrinsicSDNode *MemIntr = dyn_cast<MemIntrinsicSDNode>(Op);
assert(MemIntr && "Expected MemIntrinsicSDNode!");		assert(MemIntr && "Expected MemIntrinsicSDNode!");

if (isAllOnesConstant(Mask)) // return just a store		if (isAllOnesConstant(Mask)) // return just a store
return DAG.getStore(Chain, dl, DataToCompress, Addr,		return DAG.getStore(Chain, dl, DataToCompress, Addr,
MemIntr->getMemOperand());		MemIntr->getMemOperand());

SDValue Compressed =		// SDValue Compressed =
		igorbUnsubmitted Not Done Reply Inline Actions Chain should be changed to LoadAddress igorb: Chain should be changed to LoadAddress
		mkuperUnsubmitted Not Done Reply Inline Actions Why is this a dyn_cast? if DAG.getLoad() is guaranteed to return a LoadSDNode (and I'd assume it is), you want a cast<>. If it's not, then you need to check the result of the dyn_cast. mkuper: Why is this a dyn_cast? if DAG.getLoad() is guaranteed to return a LoadSDNode (and I'd assume…
		delenaUnsubmitted Not Done Reply Inline Actions We discussed with Igor and got into conclusion that this solution is not safe. The existing solution has the same problem. If any of subsequent optimizations will break the store-vselect-compess sequence it may end up with memory exception. In all other cases we handle masked store as a sole node in order to exclude this situation. I suggest to add "IsCompressed" flag to MaskedStoreSDNode and than use it for COMPRESS_TO_MEM. Igor implemented masked_truncstore, you can take a look. delena: We discussed with Igor and got into conclusion that this solution is not safe. The existing…
getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, DataToCompress),		// getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, DataToCompress),
		mkuperUnsubmitted Not Done Reply Inline Actions The formatting here looks wrong. mkuper: The formatting here looks wrong.
Mask, DAG.getUNDEF(VT), Subtarget, DAG);		// Mask, DAG.getUNDEF(VT), Subtarget, DAG);
return DAG.getStore(Chain, dl, Compressed, Addr,		// return DAG.getStore(Chain, dl, Compressed, Addr,
		delenaUnsubmitted Not Done Reply Inline Actions do not leave commented out code delena: do not leave commented out code
MemIntr->getMemOperand());		// MemIntr->getMemOperand());
		MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
		SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);

		return DAG.getMaskedStore(Chain, dl, DataToCompress, Addr, VMask, VT,
		MemIntr->getMemOperand(), false, true);
}		}
case TRUNCATE_TO_MEM_VI8:		case TRUNCATE_TO_MEM_VI8:
case TRUNCATE_TO_MEM_VI16:		case TRUNCATE_TO_MEM_VI16:
case TRUNCATE_TO_MEM_VI32: {		case TRUNCATE_TO_MEM_VI32: {
SDValue Mask = Op.getOperand(4);		SDValue Mask = Op.getOperand(4);
SDValue DataToTruncate = Op.getOperand(3);		SDValue DataToTruncate = Op.getOperand(3);
SDValue Addr = Op.getOperand(2);		SDValue Addr = Op.getOperand(2);
SDValue Chain = Op.getOperand(0);		SDValue Chain = Op.getOperand(0);
▲ Show 20 Lines • Show All 13,686 Lines • Show Last 20 Lines

lib/Target/X86/X86InstrAVX512.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 7,366 Lines • ▼ Show 20 Lines	defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
avx512vl_i32_info, HasDQI>;		avx512vl_i32_info, HasDQI>;
defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",		defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
avx512vl_i64_info, HasDQI>, VEX_W;		avx512vl_i64_info, HasDQI>, VEX_W;

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// AVX-512 - COMPRESS and EXPAND		// AVX-512 - COMPRESS and EXPAND
//		//

multiclass compress_by_vec_width<bits<8> opc, X86VectorVTInfo _,		multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
string OpcodeStr> {		string OpcodeStr > {
defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),		defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
(ins _.RC:$src1), OpcodeStr, "$src1", "$src1",		(ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
(_.VT (X86compress _.RC:$src1))>, AVX5128IBase;		(_.VT (X86compress _.RC:$src1))>, AVX5128IBase;

let mayStore = 1, hasSideEffects = 0 in		let mayStore = 1, hasSideEffects = 0 in
def mr : AVX5128I<opc, MRMDestMem, (outs),		def mr : AVX5128I<opc, MRMDestMem, (outs),
(ins _.MemOp:$dst, _.RC:$src),		(ins _.MemOp:$dst, _.RC:$src),
OpcodeStr # "\t{$src, $dst\|$dst, $src}",		OpcodeStr # "\t{$src, $dst\|$dst, $src}",
[]>, EVEX_CD8<_.EltSize, CD8VT1>;		[]>, EVEX_CD8<_.EltSize, CD8VT1>;

def mrk : AVX5128I<opc, MRMDestMem, (outs),		def mrk : AVX5128I<opc, MRMDestMem, (outs),
(ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),		(ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
OpcodeStr # "\t{$src, $dst {${mask}}\|$dst {${mask}}, $src}",		OpcodeStr # "\t{$src, $dst {${mask}}\|$dst {${mask}}, $src}",
[(store (_.VT (vselect _.KRCWM:$mask,		[]>,
(_.VT (X86compress _.RC:$src)), _.ImmAllZerosV)),
addr:$dst)]>,
EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;		EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
}		}

		multiclass compress_by_vec_width_lowering<X86VectorVTInfo _ > {

		def : Pat<(X86mCompressedStore addr:$dst, _.KRCWM:$mask,
		(_.VT _.RC:$src)),
		(!cast<Instruction>(NAME#_.ZSuffix##mrk)
		addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
		}


multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,		multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo VTInfo> {		AVX512VLVectorVTInfo VTInfo > {
defm Z : compress_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;		defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr>,
		compress_by_vec_width_lowering<VTInfo.info512>, EVEX_V512;

let Predicates = [HasVLX] in {		let Predicates = [HasVLX] in {
defm Z256 : compress_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;		defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr>,
defm Z128 : compress_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;		compress_by_vec_width_lowering<VTInfo.info256>, EVEX_V256;
		defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr>,
		compress_by_vec_width_lowering<VTInfo.info128>, EVEX_V128;
}		}
}		}

defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", avx512vl_i32_info>,		defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", avx512vl_i32_info>,
EVEX;		EVEX;
defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", avx512vl_i64_info>,		defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", avx512vl_i64_info>,
EVEX, VEX_W;		EVEX, VEX_W;
defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", avx512vl_f32_info>,		defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", avx512vl_f32_info>,
EVEX;		EVEX;
defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", avx512vl_f64_info>,		defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", avx512vl_f64_info>,
EVEX, VEX_W;		EVEX, VEX_W;

// expand		// expand
multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,		multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
string OpcodeStr> {		string OpcodeStr> {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),		defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1), OpcodeStr, "$src1", "$src1",		(ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
(_.VT (X86expand _.RC:$src1))>, AVX5128IBase;		(_.VT (X86expand _.RC:$src1))>, AVX5128IBase;

▲ Show 20 Lines • Show All 1,045 Lines • Show Last 20 Lines

lib/Target/X86/X86InstrFragmentsSIMD.td

Show First 20 Lines • Show All 967 Lines • ▼ Show 20 Lines	def masked_load_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
return isa<MaskedLoadSDNode>(N);		return isa<MaskedLoadSDNode>(N);
}]>;		}]>;

// Masked store fragments.		// Masked store fragments.
// X86mstore can't be implemented in core DAG files because some targets		// X86mstore can't be implemented in core DAG files because some targets
// do not support vector types (llvm-tblgen will fail).		// do not support vector types (llvm-tblgen will fail).
def X86mstore : PatFrag<(ops node:$src1, node:$src2, node:$src3),		def X86mstore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_store node:$src1, node:$src2, node:$src3), [{		(masked_store node:$src1, node:$src2, node:$src3), [{
return !cast<MaskedStoreSDNode>(N)->isTruncatingStore();		return (!cast<MaskedStoreSDNode>(N)->isTruncatingStore()) &&
		(!cast<MaskedStoreSDNode>(N)->isCompressedStore());
}]>;		}]>;

def masked_store_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3),		def masked_store_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(X86mstore node:$src1, node:$src2, node:$src3), [{		(X86mstore node:$src1, node:$src2, node:$src3), [{
if (auto *Store = dyn_cast<MaskedStoreSDNode>(N))		if (auto *Store = dyn_cast<MaskedStoreSDNode>(N))
return Store->getAlignment() >= 16;		return Store->getAlignment() >= 16;
return false;		return false;
}]>;		}]>;
Show All 12 Lines	def masked_store_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
return false;		return false;
}]>;		}]>;

def masked_store_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),		def masked_store_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(X86mstore node:$src1, node:$src2, node:$src3), [{		(X86mstore node:$src1, node:$src2, node:$src3), [{
return isa<MaskedStoreSDNode>(N);		return isa<MaskedStoreSDNode>(N);
}]>;		}]>;

		def X86mCompressedStore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
		(masked_store node:$src1, node:$src2, node:$src3), [{
		return cast<MaskedStoreSDNode>(N)->isCompressedStore();
		}]>;

// masked truncstore fragments		// masked truncstore fragments
// X86mtruncstore can't be implemented in core DAG files because some targets		// X86mtruncstore can't be implemented in core DAG files because some targets
// doesn't support vector type ( llvm-tblgen will fail)		// doesn't support vector type ( llvm-tblgen will fail)
def X86mtruncstore : PatFrag<(ops node:$src1, node:$src2, node:$src3),		def X86mtruncstore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_store node:$src1, node:$src2, node:$src3), [{		(masked_store node:$src1, node:$src2, node:$src3), [{
return cast<MaskedStoreSDNode>(N)->isTruncatingStore();		return cast<MaskedStoreSDNode>(N)->isTruncatingStore();
}]>;		}]>;

def masked_truncstorevi8 :		def masked_truncstorevi8 :
PatFrag<(ops node:$src1, node:$src2, node:$src3),		PatFrag<(ops node:$src1, node:$src2, node:$src3),
(X86mtruncstore node:$src1, node:$src2, node:$src3), [{		(X86mtruncstore node:$src1, node:$src2, node:$src3), [{
return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;		return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
}]>;		}]>;
def masked_truncstorevi16 :		def masked_truncstorevi16 :
PatFrag<(ops node:$src1, node:$src2, node:$src3),		PatFrag<(ops node:$src1, node:$src2, node:$src3),
(X86mtruncstore node:$src1, node:$src2, node:$src3), [{		(X86mtruncstore node:$src1, node:$src2, node:$src3), [{
return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;		return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
}]>;		}]>;
def masked_truncstorevi32 :		def masked_truncstorevi32 :
PatFrag<(ops node:$src1, node:$src2, node:$src3),		PatFrag<(ops node:$src1, node:$src2, node:$src3),
(X86mtruncstore node:$src1, node:$src2, node:$src3), [{		(X86mtruncstore node:$src1, node:$src2, node:$src3), [{
return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;		return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
}]>;		}]>;

test/CodeGen/X86/avx512vl-intrinsics.ll

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 887 Lines • ▼ Show 20 Lines
; CHECK: ## BB#0:		; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]		; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcompressd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0]		; CHECK-NEXT: vpcompressd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0]
; CHECK-NEXT: retq ## encoding: [0xc3]		; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32>zeroinitializer, i8 %mask)		%res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32>zeroinitializer, i8 %mask)
ret <4 x i32> %res		ret <4 x i32> %res
}		}

		@xmm = common global <4 x i32> zeroinitializer, align 16
		@k8 = common global i8 0, align 1

		define i32 @compr11() {
		; CHECK-LABEL: compr11:
		; CHECK: ## BB#0: ## %entry
		; CHECK-NEXT: movq _xmm@{{.*}}(%rip), %rax ## encoding: [0x48,0x8b,0x05,A,A,A,A]
		; CHECK-NEXT: ## fixup A - offset: 3, value: _xmm@GOTPCREL-4, kind: reloc_riprel_4byte_movq_load
		; CHECK-NEXT: vmovdqa32 (%rax), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6f,0x00]
		; CHECK-NEXT: movq _k8@{{.*}}(%rip), %rax ## encoding: [0x48,0x8b,0x05,A,A,A,A]
		; CHECK-NEXT: ## fixup A - offset: 3, value: _k8@GOTPCREL-4, kind: reloc_riprel_4byte_movq_load
		; CHECK-NEXT: movzbl (%rax), %eax ## encoding: [0x0f,0xb6,0x00]
		; CHECK-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
		; CHECK-NEXT: vpcompressd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0]
		; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
		; CHECK-NEXT: vmovdqa32 %xmm0, -{{[0-9]+}}(%rsp) ## encoding: [0x62,0xf1,0x7d,0x08,0x7f,0x84,0x24,0xd8,0xff,0xff,0xff]
		; CHECK-NEXT: vmovdqa32 %xmm1, -{{[0-9]+}}(%rsp) ## encoding: [0x62,0xf1,0x7d,0x08,0x7f,0x8c,0x24,0xe8,0xff,0xff,0xff]
		; CHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
		; CHECK-NEXT: retq ## encoding: [0xc3]
		entry:
		%.compoundliteral = alloca <2 x i64>, align 16
		%res = alloca <4 x i32>, align 16
		%a0 = load <4 x i32>, <4 x i32>* @xmm, align 16
		%a2 = load i8, i8* @k8, align 1
		%a21 = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %a0, <4 x i32> zeroinitializer, i8 %a2) #2
		store volatile <4 x i32> %a21, <4 x i32>* %res, align 16
		store <2 x i64> zeroinitializer, <2 x i64>* %.compoundliteral, align 16
		ret i32 0
		}


declare <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)		declare <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)

; Expand		; Expand

define <8 x double> @expand1(i8* %addr, <8 x double> %data, i8 %mask) {		define <8 x double> @expand1(i8* %addr, <8 x double> %data, i8 %mask) {
; CHECK-LABEL: expand1:		; CHECK-LABEL: expand1:
; CHECK: ## BB#0:		; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]		; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
▲ Show 20 Lines • Show All 4,387 Lines • ▼ Show 20 Lines	; CHECK-NEXT: retq ## encoding: [0xc3]
ret <8 x i32> %res4		ret <8 x i32> %res4
}		}

define <8 x i32>@test_int_x86_avx512_mask_psrav8_si_const() {		define <8 x i32>@test_int_x86_avx512_mask_psrav8_si_const() {
; CHECK-LABEL: test_int_x86_avx512_mask_psrav8_si_const:		; CHECK-LABEL: test_int_x86_avx512_mask_psrav8_si_const:
; CHECK: ## BB#0:		; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqa32 {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]		; CHECK-NEXT: vmovdqa32 {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
; CHECK-NEXT: ## encoding: [0x62,0xf1,0x7d,0x28,0x6f,0x05,A,A,A,A]		; CHECK-NEXT: ## encoding: [0x62,0xf1,0x7d,0x28,0x6f,0x05,A,A,A,A]
; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI312_0-4, kind: reloc_riprel_4byte		; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI313_0-4, kind: reloc_riprel_4byte
; CHECK-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x46,0x05,A,A,A,A]		; CHECK-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x46,0x05,A,A,A,A]
; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI312_1-4, kind: reloc_riprel_4byte		; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI313_1-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq ## encoding: [0xc3]		; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>, <8 x i32> zeroinitializer, i8 -1)		%res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>, <8 x i32> zeroinitializer, i8 -1)
ret <8 x i32> %res		ret <8 x i32> %res
}		}

declare <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)		declare <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)

define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {		define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
Show All 14 Lines	; CHECK-NEXT: retq ## encoding: [0xc3]
ret <2 x i64> %res4		ret <2 x i64> %res4
}		}

define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128_const(i8 %x3) {		define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128_const(i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_psrav_q_128_const:		; CHECK-LABEL: test_int_x86_avx512_mask_psrav_q_128_const:
; CHECK: ## BB#0:		; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqa64 {{.*#+}} xmm0 = [2,18446744073709551607]		; CHECK-NEXT: vmovdqa64 {{.*#+}} xmm0 = [2,18446744073709551607]
; CHECK-NEXT: ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0x05,A,A,A,A]		; CHECK-NEXT: ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0x05,A,A,A,A]
; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI314_0-4, kind: reloc_riprel_4byte		; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI315_0-4, kind: reloc_riprel_4byte
; CHECK-NEXT: vpsravq {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x46,0x05,A,A,A,A]		; CHECK-NEXT: vpsravq {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x46,0x05,A,A,A,A]
; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI314_1-4, kind: reloc_riprel_4byte		; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI315_1-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq ## encoding: [0xc3]		; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> <i64 2, i64 -9>, <2 x i64> <i64 1, i64 90>, <2 x i64> zeroinitializer, i8 -1)		%res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> <i64 2, i64 -9>, <2 x i64> <i64 1, i64 90>, <2 x i64> zeroinitializer, i8 -1)
ret <2 x i64> %res		ret <2 x i64> %res
}		}

declare <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)		declare <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)

define <4 x i64>@test_int_x86_avx512_mask_psrav_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {		define <4 x i64>@test_int_x86_avx512_mask_psrav_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
▲ Show 20 Lines • Show All 1,289 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[X86] Fix bug in masked compress
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 70704

include/llvm/CodeGen/SelectionDAG.h

include/llvm/CodeGen/SelectionDAGNodes.h

lib/CodeGen/SelectionDAG/SelectionDAG.cpp

lib/Target/X86/X86ISelLowering.cpp

lib/Target/X86/X86InstrAVX512.td

lib/Target/X86/X86InstrFragmentsSIMD.td

test/CodeGen/X86/avx512vl-intrinsics.ll

This is an archive of the discontinued LLVM Phabricator instance.

[X86] Fix bug in masked compressClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 70704

include/llvm/CodeGen/SelectionDAG.h

include/llvm/CodeGen/SelectionDAGNodes.h

lib/CodeGen/SelectionDAG/SelectionDAG.cpp

lib/Target/X86/X86ISelLowering.cpp

lib/Target/X86/X86InstrAVX512.td

lib/Target/X86/X86InstrFragmentsSIMD.td

test/CodeGen/X86/avx512vl-intrinsics.ll

[X86] Fix bug in masked compress
ClosedPublic