Diff 419096

llvm/lib/Target/X86/X86LowerAMXType.cpp

Show First 20 Lines • Show All 68 Lines • ▼ Show 20 Lines
#define DEBUG_TYPE "lower-amx-type"		#define DEBUG_TYPE "lower-amx-type"

static bool isAMXCast(Instruction *II) {		static bool isAMXCast(Instruction *II) {
return match(II,		return match(II,
m_Intrinsic<Intrinsic::x86_cast_vector_to_tile>(m_Value())) \|\|		m_Intrinsic<Intrinsic::x86_cast_vector_to_tile>(m_Value())) \|\|
match(II, m_Intrinsic<Intrinsic::x86_cast_tile_to_vector>(m_Value()));		match(II, m_Intrinsic<Intrinsic::x86_cast_tile_to_vector>(m_Value()));
}		}

		static bool isAMXInstrinsic(User *I) {
		auto *II = dyn_cast<IntrinsicInst>(I);
		if (!II)
		return false;
		switch (II->getIntrinsicID()) {
		xiangzhangllvmUnsubmitted Not Done Reply Inline Actions Can we implement this with "Reverse thinking". for example: use x86_amx tile and exclude non-amx-instrinsics instruction (cast, copy, ..). In this way we may no need to care about here when we add new AMXs. xiangzhangllvm: Can we implement this with "Reverse thinking". for example: use x86_amx tile and exclude non…
		LuoYuankeAuthorUnsubmitted Done Reply Inline Actions Most of intrinsic return x86_amx, an exception is tilestored64_internal which return void. Maybe add llvm.x86.amx prefix name for each AMX intrinsic, so that we can distinguish amx intrinsics by its name? LuoYuanke: Most of intrinsic return x86_amx, an exception is tilestored64_internal which return void.
		xiangzhangllvmUnsubmitted Not Done Reply Inline Actions re-add prefix is ok but is big job. tilestored64_internal also use x86_amx operand. Because mainly amx intrinsics use/def x86_amx data. We can both check return's and operands' type, then exclude a few non-amx-instrinsics instructions. xiangzhangllvm: re-add prefix is ok but is big job. tilestored64_internal also use x86_amx operand. Because…
		LuoYuankeAuthorUnsubmitted Done Reply Inline Actions Good idea. Updated the patch. LuoYuanke: Good idea. Updated the patch.
		default:
		return false;
		case Intrinsic::x86_tilezero_internal:
		case Intrinsic::x86_tileloadd64_internal:
		case Intrinsic::x86_tileloaddt164_internal:
		case Intrinsic::x86_tilestored64_internal:
		case Intrinsic::x86_tdpbssd_internal:
		case Intrinsic::x86_tdpbsud_internal:
		case Intrinsic::x86_tdpbusd_internal:
		case Intrinsic::x86_tdpbuud_internal:
		case Intrinsic::x86_tdpbf16ps_internal:
		return true;
		}
		}

static AllocaInst createAllocaInstAtEntry(IRBuilder<> &Builder, BasicBlock BB,		static AllocaInst createAllocaInstAtEntry(IRBuilder<> &Builder, BasicBlock BB,
Type *Ty) {		Type *Ty) {
Function &F = *BB->getParent();		Function &F = *BB->getParent();
Module *M = BB->getModule();		Module *M = BB->getModule();
const DataLayout &DL = M->getDataLayout();		const DataLayout &DL = M->getDataLayout();

LLVMContext &Ctx = Builder.getContext();		LLVMContext &Ctx = Builder.getContext();
auto AllocaAlignment = DL.getPrefTypeAlign(Type::getX86_AMXTy(Ctx));		auto AllocaAlignment = DL.getPrefTypeAlign(Type::getX86_AMXTy(Ctx));
▲ Show 20 Lines • Show All 72 Lines • ▼ Show 20 Lines	case Intrinsic::x86_tdpbf16ps_internal: {
}		}
break;		break;
}		}
}		}

return std::make_pair(Row, Col);		return std::make_pair(Row, Col);
}		}

		static std::pair<Value , Value > getShape(PHINode *Phi) {
		Use &U = *(Phi->use_begin());
		xiangzhangllvmUnsubmitted Not Done Reply Inline Actions a1: Here only fetch the first use. a2: line 191 if its user "V->use_empty()" will fail ? a3: how about a1 second use can get shape but 1st not. xiangzhangllvm: a1: Here only fetch the first use. a2: line 191 if its user "V->use_empty()" will fail ? a3…
		LuoYuankeAuthorUnsubmitted Done Reply Inline Actions We don't traverse all node, that is more complex. Here we just traverse the first user. If we can find shape, then return the shape, otherwise just return nullptr and abandon the optimization. Mostly an value should have an user, so it mostly can get shape. But if it is not, we just abandon the optimization. I think we can enhance it when there is user case that need to be optimized. How about add TODO for it? LuoYuanke: We don't traverse all node, that is more complex. Here we just traverse the first user. If we…
		xiangzhangllvmUnsubmitted Not Done Reply Inline Actions I think no problem. xiangzhangllvm: I think no problem.
		unsigned OpNo = U.getOperandNo();
		User *V = U.getUser();
		while (V) {
		if (isAMXCast(dyn_cast<Instruction>(V))) {
		if (V->use_empty())
		break;
		Use &U = *(V->use_begin());
		OpNo = U.getOperandNo();
		V = U.getUser();
		continue;
		} else if (isAMXInstrinsic(V)) {
		return getShape(cast<IntrinsicInst>(V), OpNo);
		} else if (isa<PHINode>(V)) {
		if (V->use_empty())
		break;
		Use &U = *(Phi->use_begin());
		V = U.getUser();
		continue;
		} else {
		break;
		}
		}

		return std::make_pair(nullptr, nullptr);
		}

namespace {		namespace {
class X86LowerAMXType {		class X86LowerAMXType {
Function &Func;		Function &Func;

// In AMX intrinsics we let Shape = {Row, Col}, but the		// In AMX intrinsics we let Shape = {Row, Col}, but the
// RealCol = Col / ElementSize. We may use the RealCol		// RealCol = Col / ElementSize. We may use the RealCol
// as a new Row for other new created AMX intrinsics.		// as a new Row for other new created AMX intrinsics.
std::map<Value , Value > Col2Row;		std::map<Value , Value > Col2Row;
▲ Show 20 Lines • Show All 542 Lines • ▼ Show 20 Lines	bool X86LowerAMXCast::optimizeAMXCastFromPhi(
// Find all of the A->B casts and PHI nodes.		// Find all of the A->B casts and PHI nodes.
// We need to inspect all related PHI nodes, but PHIs can be cyclic, so		// We need to inspect all related PHI nodes, but PHIs can be cyclic, so
// OldPhiNodes is used to track all known PHI nodes, before adding a new		// OldPhiNodes is used to track all known PHI nodes, before adding a new
// PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.		// PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
PhiWorklist.push_back(PN);		PhiWorklist.push_back(PN);
OldPhiNodes.insert(PN);		OldPhiNodes.insert(PN);
while (!PhiWorklist.empty()) {		while (!PhiWorklist.empty()) {
auto *OldPN = PhiWorklist.pop_back_val();		auto *OldPN = PhiWorklist.pop_back_val();
for (Value *IncValue : OldPN->incoming_values()) {		for (unsigned I = 0; I < OldPN->getNumOperands(); ++I) {
		Value *IncValue = OldPN->getIncomingValue(I);
// TODO: currently, We ignore cases where it is a const. In the future, we		// TODO: currently, We ignore cases where it is a const. In the future, we
// might support const.		// might support const.
if (isa<Constant>(IncValue))		if (isa<Constant>(IncValue)) {
		auto *IncConst = dyn_cast<Constant>(IncValue);
		if (!isa<UndefValue>(IncValue) && !IncConst->isZeroValue())
return false;		return false;
		Value Row = nullptr, Col = nullptr;
		std::tie(Row, Col) = getShape(OldPN);
		// TODO: If it is not constant the Row and Col must domoniate tilezero
		// that we are going to create.
		if (!Row \|\| !Col \|\| !isa<Constant>(Row) \|\| !isa<Constant>(Col))
		return false;
		// Create tilezero at the end of incoming block.
		auto *Block = OldPN->getIncomingBlock(I);
		BasicBlock::iterator Iter = Block->getTerminator()->getIterator();
		Instruction *NewInst = Builder.CreateIntrinsic(
		Intrinsic::x86_tilezero_internal, None, {Row, Col});
		NewInst->moveBefore(&*Iter);
		NewInst = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector,
		{IncValue->getType()}, {NewInst});
		NewInst->moveBefore(&*Iter);
		// Replace InValue with new Value.
		OldPN->setIncomingValue(I, NewInst);
		IncValue = NewInst;
		}

if (auto *PNode = dyn_cast<PHINode>(IncValue)) {		if (auto *PNode = dyn_cast<PHINode>(IncValue)) {
if (OldPhiNodes.insert(PNode))		if (OldPhiNodes.insert(PNode))
PhiWorklist.push_back(PNode);		PhiWorklist.push_back(PNode);
continue;		continue;
}		}
Instruction *ACI = dyn_cast<Instruction>(IncValue);		Instruction *ACI = dyn_cast<Instruction>(IncValue);
if (ACI && isAMXCast(ACI)) {		if (ACI && isAMXCast(ACI)) {
▲ Show 20 Lines • Show All 341 Lines • Show Last 20 Lines

llvm/test/CodeGen/X86/AMX/amx-combine-undef.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -lower-amx-type %s -S \| FileCheck %s			; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -lower-amx-type %s -S \| FileCheck %s

	define void @foo_undef(i8 *%buf) {			define void @foo_undef(i8 *%buf) {
	; CHECK-LABEL: @foo_undef(			; CHECK-LABEL: @foo_undef(
	; CHECK-NEXT: entry:			; CHECK-NEXT: entry:
	; CHECK-NEXT: [[TMP0:%.*]] = alloca <256 x i32>, align 64			; CHECK-NEXT: [[TMP0:%.*]] = call x86_amx @llvm.x86.tilezero.internal(i16 8, i16 32)
	; CHECK-NEXT: [[TMP1:%.*]] = alloca <256 x i32>, align 64
	; CHECK-NEXT: br i1 undef, label [[L1:%.]], label [[L2:%.]]			; CHECK-NEXT: br i1 undef, label [[L1:%.]], label [[L2:%.]]
	; CHECK: l1:			; CHECK: l1:
	; CHECK-NEXT: [[T1:%.*]] = call x86_amx @llvm.x86.tilezero.internal(i16 8, i16 32)			; CHECK-NEXT: [[T1:%.*]] = call x86_amx @llvm.x86.tilezero.internal(i16 8, i16 32)
	; CHECK-NEXT: [[TMP2:%.]] = bitcast <256 x i32> [[TMP1]] to i8*
	; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 8, i16 32, i8* [[TMP2]], i64 32, x86_amx [[T1]])
	; CHECK-NEXT: [[TMP3:%.]] = load <256 x i32>, <256 x i32> [[TMP1]], align 1024
	; CHECK-NEXT: br i1 undef, label [[L2]], label [[EXIT:%.*]]			; CHECK-NEXT: br i1 undef, label [[L2]], label [[EXIT:%.*]]
	; CHECK: l2:			; CHECK: l2:
	; CHECK-NEXT: [[T3:%.]] = phi <256 x i32> [ undef, [[ENTRY:%.]] ], [ [[TMP3]], [[L1]] ]			; CHECK-NEXT: [[TMP1:%.]] = phi x86_amx [ [[TMP0]], [[ENTRY:%.]] ], [ [[T1]], [[L1]] ]
	; CHECK-NEXT: [[TMP4:%.]] = bitcast <256 x i32> [[TMP0]] to i8*			; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 8, i16 32, i8* [[BUF:%.*]], i64 1024, x86_amx [[TMP1]])
	; CHECK-NEXT: store <256 x i32> [[T3]], <256 x i32>* [[TMP0]], align 1024
	; CHECK-NEXT: [[TMP5:%.]] = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 32, i8 [[TMP4]], i64 32)
	; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 8, i16 32, i8* [[BUF:%.*]], i64 1024, x86_amx [[TMP5]])
	; CHECK-NEXT: br label [[EXIT]]			; CHECK-NEXT: br label [[EXIT]]
	; CHECK: exit:			; CHECK: exit:
	; CHECK-NEXT: ret void			; CHECK-NEXT: ret void
	;			;
	entry:			entry:
	br i1 undef, label %l1, label %l2			br i1 undef, label %l1, label %l2

	l1:			l1:
	Show All 9 Lines

	exit:			exit:
	ret void			ret void
	}			}

	define void @foo_zero(i8 *%buf) {			define void @foo_zero(i8 *%buf) {
	; CHECK-LABEL: @foo_zero(			; CHECK-LABEL: @foo_zero(
	; CHECK-NEXT: entry:			; CHECK-NEXT: entry:
	; CHECK-NEXT: [[TMP0:%.*]] = alloca <256 x i32>, align 64			; CHECK-NEXT: [[TMP0:%.*]] = call x86_amx @llvm.x86.tilezero.internal(i16 8, i16 32)
	; CHECK-NEXT: [[TMP1:%.*]] = alloca <256 x i32>, align 64
	; CHECK-NEXT: br i1 undef, label [[L1:%.]], label [[L2:%.]]			; CHECK-NEXT: br i1 undef, label [[L1:%.]], label [[L2:%.]]
	; CHECK: l1:			; CHECK: l1:
	; CHECK-NEXT: [[T1:%.*]] = call x86_amx @llvm.x86.tilezero.internal(i16 8, i16 32)			; CHECK-NEXT: [[T1:%.*]] = call x86_amx @llvm.x86.tilezero.internal(i16 8, i16 32)
	; CHECK-NEXT: [[TMP2:%.]] = bitcast <256 x i32> [[TMP1]] to i8*
	; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 8, i16 32, i8* [[TMP2]], i64 32, x86_amx [[T1]])
	; CHECK-NEXT: [[TMP3:%.]] = load <256 x i32>, <256 x i32> [[TMP1]], align 1024
	; CHECK-NEXT: br i1 undef, label [[L2]], label [[EXIT:%.*]]			; CHECK-NEXT: br i1 undef, label [[L2]], label [[EXIT:%.*]]
	; CHECK: l2:			; CHECK: l2:
	; CHECK-NEXT: [[T3:%.]] = phi <256 x i32> [ zeroinitializer, [[ENTRY:%.]] ], [ [[TMP3]], [[L1]] ]			; CHECK-NEXT: [[TMP1:%.]] = phi x86_amx [ [[TMP0]], [[ENTRY:%.]] ], [ [[T1]], [[L1]] ]
	; CHECK-NEXT: [[TMP4:%.]] = bitcast <256 x i32> [[TMP0]] to i8*			; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 8, i16 32, i8* [[BUF:%.*]], i64 1024, x86_amx [[TMP1]])
	; CHECK-NEXT: store <256 x i32> [[T3]], <256 x i32>* [[TMP0]], align 1024
	; CHECK-NEXT: [[TMP5:%.]] = call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 32, i8 [[TMP4]], i64 32)
	; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 8, i16 32, i8* [[BUF:%.*]], i64 1024, x86_amx [[TMP5]])
	; CHECK-NEXT: br label [[EXIT]]			; CHECK-NEXT: br label [[EXIT]]
	; CHECK: exit:			; CHECK: exit:
	; CHECK-NEXT: ret void			; CHECK-NEXT: ret void
	;			;
	entry:			entry:
	br i1 undef, label %l1, label %l2			br i1 undef, label %l1, label %l2

	l1:			l1:
	▲ Show 20 Lines • Show All 141 Lines • Show Last 20 Lines

llvm/test/CodeGen/X86/AMX/lat-combine-amx-bitcast.ll

Show First 20 Lines • Show All 181 Lines • ▼ Show 20 Lines	exit:
%evilphi2 = phi <110 x i32> [ %goodphi, %for.cond.cleanup.i.i ], [ %5, %for.body.i.lr.ph.i ]		%evilphi2 = phi <110 x i32> [ %goodphi, %for.cond.cleanup.i.i ], [ %5, %for.body.i.lr.ph.i ]
store <110 x i32> %evilphi2, <110 x i32>* undef, align 512		store <110 x i32> %evilphi2, <110 x i32>* undef, align 512
ret void		ret void
}		}

define void @fail_to_combine_amx_cast_and_phi_due_to_const_value() {		define void @fail_to_combine_amx_cast_and_phi_due_to_const_value() {
; CHECK-LABEL: @fail_to_combine_amx_cast_and_phi_due_to_const_value(		; CHECK-LABEL: @fail_to_combine_amx_cast_and_phi_due_to_const_value(
; CHECK-NEXT: wrapper_entry:		; CHECK-NEXT: wrapper_entry:
; CHECK-NEXT: [[TMP0:%.*]] = alloca <110 x i32>, align 64		; CHECK-NEXT: [[TMP0:%.*]] = alloca <560 x i8>, align 64
; CHECK-NEXT: [[TMP1:%.*]] = alloca <110 x i32>, align 64		; CHECK-NEXT: [[TMP1:%.*]] = alloca <616 x i8>, align 64
; CHECK-NEXT: [[TMP2:%.*]] = alloca <560 x i8>, align 64		; CHECK-NEXT: [[TMP2:%.*]] = alloca <110 x i32>, align 64
; CHECK-NEXT: [[TMP3:%.*]] = alloca <616 x i8>, align 64		; CHECK-NEXT: [[TMP3:%.*]] = call x86_amx @llvm.x86.tilezero.internal(i16 11, i16 40)
; CHECK-NEXT: [[TMP4:%.*]] = alloca <110 x i32>, align 64
; CHECK-NEXT: br i1 undef, label [[FOR_COND_CLEANUP_I_I:%.]], label [[FOR_BODY_I_LR_PH_I:%.]]		; CHECK-NEXT: br i1 undef, label [[FOR_COND_CLEANUP_I_I:%.]], label [[FOR_BODY_I_LR_PH_I:%.]]
; CHECK: for.body.i.lr.ph.i:		; CHECK: for.body.i.lr.ph.i:
; CHECK-NEXT: [[TMP5:%.]] = bitcast <110 x i32> [[TMP4]] to i8*		; CHECK-NEXT: [[TMP4:%.]] = bitcast <110 x i32> [[TMP2]] to i8*
; CHECK-NEXT: store <110 x i32> undef, <110 x i32>* [[TMP4]], align 512		; CHECK-NEXT: store <110 x i32> undef, <110 x i32>* [[TMP2]], align 512
; CHECK-NEXT: [[TMP6:%.]] = call x86_amx @llvm.x86.tileloadd64.internal(i16 11, i16 40, i8 [[TMP5]], i64 40)		; CHECK-NEXT: [[TMP5:%.]] = call x86_amx @llvm.x86.tileloadd64.internal(i16 11, i16 40, i8 [[TMP4]], i64 40)
; CHECK-NEXT: [[TMP7:%.]] = bitcast <616 x i8> [[TMP3]] to i8*		; CHECK-NEXT: [[TMP6:%.]] = bitcast <616 x i8> [[TMP1]] to i8*
; CHECK-NEXT: store <616 x i8> undef, <616 x i8>* [[TMP3]], align 1024		; CHECK-NEXT: store <616 x i8> undef, <616 x i8>* [[TMP1]], align 1024
; CHECK-NEXT: [[TMP8:%.]] = call x86_amx @llvm.x86.tileloadd64.internal(i16 11, i16 56, i8 [[TMP7]], i64 56)		; CHECK-NEXT: [[TMP7:%.]] = call x86_amx @llvm.x86.tileloadd64.internal(i16 11, i16 56, i8 [[TMP6]], i64 56)
; CHECK-NEXT: [[TMP9:%.]] = bitcast <560 x i8> [[TMP2]] to i8*		; CHECK-NEXT: [[TMP8:%.]] = bitcast <560 x i8> [[TMP0]] to i8*
; CHECK-NEXT: store <560 x i8> undef, <560 x i8>* [[TMP2]], align 1024		; CHECK-NEXT: store <560 x i8> undef, <560 x i8>* [[TMP0]], align 1024
; CHECK-NEXT: [[TMP10:%.]] = call x86_amx @llvm.x86.tileloadd64.internal(i16 14, i16 40, i8 [[TMP9]], i64 40)		; CHECK-NEXT: [[TMP9:%.]] = call x86_amx @llvm.x86.tileloadd64.internal(i16 14, i16 40, i8 [[TMP8]], i64 40)
; CHECK-NEXT: [[TMP11:%.*]] = call x86_amx @llvm.x86.tdpbssd.internal(i16 11, i16 40, i16 56, x86_amx [[TMP6]], x86_amx [[TMP8]], x86_amx [[TMP10]])		; CHECK-NEXT: [[TMP10:%.*]] = call x86_amx @llvm.x86.tdpbssd.internal(i16 11, i16 40, i16 56, x86_amx [[TMP5]], x86_amx [[TMP7]], x86_amx [[TMP9]])
; CHECK-NEXT: [[TMP12:%.]] = bitcast <110 x i32> [[TMP1]] to i8*
; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 11, i16 40, i8* [[TMP12]], i64 40, x86_amx [[TMP11]])
; CHECK-NEXT: [[TMP13:%.]] = load <110 x i32>, <110 x i32> [[TMP1]], align 512
; CHECK-NEXT: br label [[FOR_COND_CLEANUP_I_I]]		; CHECK-NEXT: br label [[FOR_COND_CLEANUP_I_I]]
; CHECK: for.cond.cleanup.i.i:		; CHECK: for.cond.cleanup.i.i:
; CHECK-NEXT: [[EVILPHI:%.]] = phi <110 x i32> [ undef, [[WRAPPER_ENTRY:%.]] ], [ [[TMP13]], [[FOR_BODY_I_LR_PH_I]] ]		; CHECK-NEXT: [[TMP11:%.]] = phi x86_amx [ [[TMP3]], [[WRAPPER_ENTRY:%.]] ], [ [[TMP10]], [[FOR_BODY_I_LR_PH_I]] ]
; CHECK-NEXT: [[TMP14:%.]] = bitcast <110 x i32> [[TMP0]] to i8*		; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 11, i16 40, i8* undef, i64 undef, x86_amx [[TMP11]])
; CHECK-NEXT: store <110 x i32> [[EVILPHI]], <110 x i32>* [[TMP0]], align 512
; CHECK-NEXT: [[TMP15:%.]] = call x86_amx @llvm.x86.tileloadd64.internal(i16 11, i16 40, i8 [[TMP14]], i64 40)
; CHECK-NEXT: call void @llvm.x86.tilestored64.internal(i16 11, i16 40, i8* undef, i64 undef, x86_amx [[TMP15]])
; CHECK-NEXT: ret void		; CHECK-NEXT: ret void
;		;
wrapper_entry:		wrapper_entry:
br i1 undef, label %for.cond.cleanup.i.i, label %for.body.i.lr.ph.i		br i1 undef, label %for.cond.cleanup.i.i, label %for.body.i.lr.ph.i

for.body.i.lr.ph.i: ; preds = %wrapper_entry		for.body.i.lr.ph.i: ; preds = %wrapper_entry
%0 = call x86_amx @llvm.x86.cast.vector.to.tile.v110i32(<110 x i32> undef)		%0 = call x86_amx @llvm.x86.cast.vector.to.tile.v110i32(<110 x i32> undef)
%1 = call x86_amx @llvm.x86.cast.vector.to.tile.v616i8(<616 x i8> undef)		%1 = call x86_amx @llvm.x86.cast.vector.to.tile.v616i8(<616 x i8> undef)
▲ Show 20 Lines • Show All 188 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[X86][AMX] Materialize undef or zero value to tilezero
ClosedPublic

Details

Diff Detail

Unit TestsFailed

Event Timeline

Revision Contents

Diff 419096

llvm/lib/Target/X86/X86LowerAMXType.cpp

llvm/test/CodeGen/X86/AMX/amx-combine-undef.ll

llvm/test/CodeGen/X86/AMX/lat-combine-amx-bitcast.ll

This is an archive of the discontinued LLVM Phabricator instance.

[X86][AMX] Materialize undef or zero value to tilezeroClosedPublic

Details

Diff Detail

Unit TestsFailed

Event Timeline

Revision Contents

Diff 419096

llvm/lib/Target/X86/X86LowerAMXType.cpp

llvm/test/CodeGen/X86/AMX/amx-combine-undef.ll

llvm/test/CodeGen/X86/AMX/lat-combine-amx-bitcast.ll

[X86][AMX] Materialize undef or zero value to tilezero
ClosedPublic