This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/
-
include/llvm/IR/
-
llvm/
-
IR/
-
IntrinsicsAArch64.td
-
lib/Target/AArch64/
-
Target/
-
AArch64/
1/2
SMEInstrFormats.td
-
test/CodeGen/AArch64/
-
CodeGen/
-
AArch64/
-
sve2-intrinsics-psel.ll

Differential D150958

[SME2/SVE2p1] Change psel intrinsic such that the result/first operand are not overloaded.
ClosedPublic

Authored by sdesmalen on May 19 2023, 3:38 AM.

Download Raw Diff

Details

Reviewers

CarolineConcatto
david-arm

Commits

rG437a516da805: [SME2/SVE2p1] Change psel intrinsic such that the result/first operand are not…

Summary

All the bits of the first operand are copied to the destination register,
if the tested bit (in the second source operand) is active. This means we
copy over all vscale x 16 x i1's of the first operand. There is no need to
overload that type.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

sdesmalen created this revision.May 19 2023, 3:38 AM

Herald added a project: Restricted Project. · View Herald TranscriptMay 19 2023, 3:38 AM

Herald added subscribers: hiraditya, tschuett. · View Herald Transcript

sdesmalen requested review of this revision.May 19 2023, 3:38 AM

Herald added a project: Restricted Project. · View Herald TranscriptMay 19 2023, 3:38 AM

Herald added a subscriber: llvm-commits. · View Herald Transcript

Harbormaster completed remote builds in B233141: Diff 523722.May 19 2023, 4:32 AM

CarolineConcatto added inline comments.May 22 2023, 5:52 AM

llvm/lib/Target/AArch64/SMEInstrFormats.td
1326	Can you change this to be PPR8?

sdesmalen added inline comments.May 22 2023, 6:28 AM

llvm/lib/Target/AArch64/SMEInstrFormats.td
1326	The instruction itself uses PPRAny (on purpose, because it should print e.g. `p0` instead of `p0.b`). It probably won't make any difference for the pattern, because PPR8 and PPRAny use the same register class, but still I'd rather stay aligned with the instruction definition.

LGTM!

This revision is now accepted and ready to land.May 22 2023, 6:47 AM

Closed by commit rG437a516da805: [SME2/SVE2p1] Change psel intrinsic such that the result/first operand are not… (authored by sdesmalen). · Explain WhyMay 22 2023, 7:13 AM

This revision was automatically updated to reflect the committed changes.

sdesmalen added a commit: rG437a516da805: [SME2/SVE2p1] Change psel intrinsic such that the result/first operand are not….

Revision Contents

Path

Size

llvm/

include/

llvm/

IR/

IntrinsicsAArch64.td

6 lines

lib/

Target/

AArch64/

SMEInstrFormats.td

16 lines

test/

CodeGen/

AArch64/

sve2-intrinsics-psel.ll

42 lines

Diff 524293

llvm/include/llvm/IR/IntrinsicsAArch64.td

Show First 20 Lines • Show All 2,786 Lines • ▼ Show 20 Lines	let TargetPrefix = "aarch64" in {

def int_aarch64_sve_revd : AdvSIMD_Merged1VectorArg_Intrinsic;		def int_aarch64_sve_revd : AdvSIMD_Merged1VectorArg_Intrinsic;

//		//
// Predicate selection		// Predicate selection
//		//

def int_aarch64_sve_psel		def int_aarch64_sve_psel
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],		: DefaultAttrsIntrinsic<[llvm_nxv16i1_ty],
[LLVMMatchType<0>,		[llvm_nxv16i1_ty,
LLVMMatchType<0>, llvm_i32_ty],		llvm_anyvector_ty, llvm_i32_ty],
[IntrNoMem]>;		[IntrNoMem]>;

//		//
// Predicate-pair intrinsics		// Predicate-pair intrinsics
//		//
foreach cmp = ["ge", "gt", "hi", "hs", "le", "lo", "ls", "lt"] in {		foreach cmp = ["ge", "gt", "hi", "hs", "le", "lo", "ls", "lt"] in {
def int_aarch64_sve_while # cmp # _x2		def int_aarch64_sve_while # cmp # _x2
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],		: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
▲ Show 20 Lines • Show All 643 Lines • Show Last 20 Lines

llvm/lib/Target/AArch64/SMEInstrFormats.td

Show First 20 Lines • Show All 1,303 Lines • ▼ Show 20 Lines	def : InstAlias<asm # "\t$Pd, $Pn, $Pm[$Rv, $imm]",
PNRAny:$Pn, PPR16:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_7:$imm), 0>;		PNRAny:$Pn, PPR16:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_7:$imm), 0>;
def : InstAlias<asm # "\t$Pd, $Pn, $Pm[$Rv, $imm]",		def : InstAlias<asm # "\t$Pd, $Pn, $Pm[$Rv, $imm]",
(!cast<Instruction>(NAME # _S) PNRAny:$Pd,		(!cast<Instruction>(NAME # _S) PNRAny:$Pd,
PNRAny:$Pn, PPR32:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_3:$imm), 0>;		PNRAny:$Pn, PPR32:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_3:$imm), 0>;
def : InstAlias<asm # "\t$Pd, $Pn, $Pm[$Rv, $imm]",		def : InstAlias<asm # "\t$Pd, $Pn, $Pm[$Rv, $imm]",
(!cast<Instruction>(NAME # _D) PNRAny:$Pd,		(!cast<Instruction>(NAME # _D) PNRAny:$Pd,
PNRAny:$Pn, PPR64:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_1:$imm), 0>;		PNRAny:$Pn, PPR64:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_1:$imm), 0>;

def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPRAny:$Pm),		def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPR8:$Pm),
MatrixIndexGPR32Op12_15:$idx)),		MatrixIndexGPR32Op12_15:$idx)),
(!cast<Instruction>(NAME # _B) $Pn, $Pm, $idx, 0)>;		(!cast<Instruction>(NAME # _B) $Pn, $Pm, $idx, 0)>;
def : Pat<(nxv8i1 (op (nxv8i1 PPRAny:$Pn), (nxv8i1 PPRAny:$Pm),		def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv8i1 PPR16:$Pm),
MatrixIndexGPR32Op12_15:$idx)),		MatrixIndexGPR32Op12_15:$idx)),
(!cast<Instruction>(NAME # _H) $Pn, $Pm, $idx, 0)>;		(!cast<Instruction>(NAME # _H) $Pn, $Pm, $idx, 0)>;
def : Pat<(nxv4i1 (op (nxv4i1 PPRAny:$Pn), (nxv4i1 PPRAny:$Pm),		def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv4i1 PPR32:$Pm),
MatrixIndexGPR32Op12_15:$idx)),		MatrixIndexGPR32Op12_15:$idx)),
(!cast<Instruction>(NAME # _S) $Pn, $Pm, $idx, 0)>;		(!cast<Instruction>(NAME # _S) $Pn, $Pm, $idx, 0)>;
def : Pat<(nxv2i1 (op (nxv2i1 PPRAny:$Pn), (nxv2i1 PPRAny:$Pm),		def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv2i1 PPR64:$Pm),
MatrixIndexGPR32Op12_15:$idx)),		MatrixIndexGPR32Op12_15:$idx)),
(!cast<Instruction>(NAME # _D) $Pn, $Pm, $idx, 0)>;		(!cast<Instruction>(NAME # _D) $Pn, $Pm, $idx, 0)>;

let AddedComplexity = 1 in {		let AddedComplexity = 1 in {
def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPRAny:$Pm),		def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPR8:$Pm),
		CarolineConcattoUnsubmitted Not Done Reply Inline Actions Can you change this to be PPR8? CarolineConcatto: Can you change this to be PPR8?
		sdesmalenAuthorUnsubmitted Done Reply Inline Actions The instruction itself uses PPRAny (on purpose, because it should print e.g. `p0` instead of `p0.b`). It probably won't make any difference for the pattern, because PPR8 and PPRAny use the same register class, but still I'd rather stay aligned with the instruction definition. sdesmalen: The instruction itself uses PPRAny (on purpose, because it should print e.g. `p0` instead of…
(i32 (tileslice8 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_15:$imm)))),		(i32 (tileslice8 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_15:$imm)))),
(!cast<Instruction>(NAME # _B) $Pn, $Pm, $idx, $imm)>;		(!cast<Instruction>(NAME # _B) $Pn, $Pm, $idx, $imm)>;
def : Pat<(nxv8i1 (op (nxv8i1 PPRAny:$Pn), (nxv8i1 PPRAny:$Pm),		def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv8i1 PPR16:$Pm),
(i32 (tileslice16 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_7:$imm)))),		(i32 (tileslice16 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_7:$imm)))),
(!cast<Instruction>(NAME # _H) $Pn, $Pm, $idx, $imm)>;		(!cast<Instruction>(NAME # _H) $Pn, $Pm, $idx, $imm)>;
def : Pat<(nxv4i1 (op (nxv4i1 PPRAny:$Pn), (nxv4i1 PPRAny:$Pm),		def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv4i1 PPR32:$Pm),
(i32 (tileslice32 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_3:$imm)))),		(i32 (tileslice32 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_3:$imm)))),
(!cast<Instruction>(NAME # _S) $Pn, $Pm, $idx, $imm)>;		(!cast<Instruction>(NAME # _S) $Pn, $Pm, $idx, $imm)>;
def : Pat<(nxv2i1 (op (nxv2i1 PPRAny:$Pn), (nxv2i1 PPRAny:$Pm),		def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv2i1 PPR64:$Pm),
(i32 (tileslice64 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_1:$imm)))),		(i32 (tileslice64 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_1:$imm)))),
(!cast<Instruction>(NAME # _D) $Pn, $Pm, $idx, $imm)>;		(!cast<Instruction>(NAME # _D) $Pn, $Pm, $idx, $imm)>;
}		}
}		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// SME2 Instructions		// SME2 Instructions
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
▲ Show 20 Lines • Show All 3,284 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve2-intrinsics-psel.ll

	Show All 16 Lines
	; CHECK-NEXT: mov w12, w0			; CHECK-NEXT: mov w12, w0
	; CHECK-NEXT: psel p0, p0, p1.b[w12, 15]			; CHECK-NEXT: psel p0, p0, p1.b[w12, 15]
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%add = add i32 %idx, 15			%add = add i32 %idx, 15
	%res = call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv16i1(<vscale x 16 x i1> %p1, <vscale x 16 x i1> %p2, i32 %add)			%res = call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv16i1(<vscale x 16 x i1> %p1, <vscale x 16 x i1> %p2, i32 %add)
	ret <vscale x 16 x i1> %res			ret <vscale x 16 x i1> %res
	}			}

	define <vscale x 8 x i1> @psel_h(<vscale x 8 x i1> %p1, <vscale x 8 x i1> %p2, i32 %idx) {			define <vscale x 16 x i1> @psel_h(<vscale x 16 x i1> %p1, <vscale x 8 x i1> %p2, i32 %idx) {
	; CHECK-LABEL: psel_h:			; CHECK-LABEL: psel_h:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: mov w12, w0			; CHECK-NEXT: mov w12, w0
	; CHECK-NEXT: psel p0, p0, p1.h[w12, 0]			; CHECK-NEXT: psel p0, p0, p1.h[w12, 0]
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 8 x i1> @llvm.aarch64.sve.psel.nxv8i1(<vscale x 8 x i1> %p1, <vscale x 8 x i1> %p2, i32 %idx)			%res = call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv8i1(<vscale x 16 x i1> %p1, <vscale x 8 x i1> %p2, i32 %idx)
	ret <vscale x 8 x i1> %res			ret <vscale x 16 x i1> %res
	}			}

	define <vscale x 8 x i1> @psel_h_imm(<vscale x 8 x i1> %p1, <vscale x 8 x i1> %p2, i32 %idx) {			define <vscale x 16 x i1> @psel_h_imm(<vscale x 16 x i1> %p1, <vscale x 8 x i1> %p2, i32 %idx) {
	; CHECK-LABEL: psel_h_imm:			; CHECK-LABEL: psel_h_imm:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: mov w12, w0			; CHECK-NEXT: mov w12, w0
	; CHECK-NEXT: psel p0, p0, p1.h[w12, 7]			; CHECK-NEXT: psel p0, p0, p1.h[w12, 7]
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%add = add i32 %idx, 7			%add = add i32 %idx, 7
	%res = call <vscale x 8 x i1> @llvm.aarch64.sve.psel.nxv8i1(<vscale x 8 x i1> %p1, <vscale x 8 x i1> %p2, i32 %add)			%res = call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv8i1(<vscale x 16 x i1> %p1, <vscale x 8 x i1> %p2, i32 %add)
	ret <vscale x 8 x i1> %res			ret <vscale x 16 x i1> %res
	}			}

	define <vscale x 4 x i1> @psel_s(<vscale x 4 x i1> %p1, <vscale x 4 x i1> %p2, i32 %idx) {			define <vscale x 16 x i1> @psel_s(<vscale x 16 x i1> %p1, <vscale x 4 x i1> %p2, i32 %idx) {
	; CHECK-LABEL: psel_s:			; CHECK-LABEL: psel_s:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: mov w12, w0			; CHECK-NEXT: mov w12, w0
	; CHECK-NEXT: psel p0, p0, p1.s[w12, 0]			; CHECK-NEXT: psel p0, p0, p1.s[w12, 0]
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 4 x i1> @llvm.aarch64.sve.psel.nxv4i1(<vscale x 4 x i1> %p1, <vscale x 4 x i1> %p2, i32 %idx)			%res = call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv4i1(<vscale x 16 x i1> %p1, <vscale x 4 x i1> %p2, i32 %idx)
	ret <vscale x 4 x i1> %res			ret <vscale x 16 x i1> %res
	}			}

	define <vscale x 4 x i1> @psel_s_imm(<vscale x 4 x i1> %p1, <vscale x 4 x i1> %p2, i32 %idx) {			define <vscale x 16 x i1> @psel_s_imm(<vscale x 16 x i1> %p1, <vscale x 4 x i1> %p2, i32 %idx) {
	; CHECK-LABEL: psel_s_imm:			; CHECK-LABEL: psel_s_imm:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: mov w12, w0			; CHECK-NEXT: mov w12, w0
	; CHECK-NEXT: psel p0, p0, p1.s[w12, 3]			; CHECK-NEXT: psel p0, p0, p1.s[w12, 3]
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%add = add i32 %idx, 3			%add = add i32 %idx, 3
	%res = call <vscale x 4 x i1> @llvm.aarch64.sve.psel.nxv4i1(<vscale x 4 x i1> %p1, <vscale x 4 x i1> %p2, i32 %add)			%res = call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv4i1(<vscale x 16 x i1> %p1, <vscale x 4 x i1> %p2, i32 %add)
	ret <vscale x 4 x i1> %res			ret <vscale x 16 x i1> %res
	}			}

	define <vscale x 2 x i1> @psel_d(<vscale x 2 x i1> %p1, <vscale x 2 x i1> %p2, i32 %idx) {			define <vscale x 16 x i1> @psel_d(<vscale x 16 x i1> %p1, <vscale x 2 x i1> %p2, i32 %idx) {
	; CHECK-LABEL: psel_d:			; CHECK-LABEL: psel_d:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: mov w12, w0			; CHECK-NEXT: mov w12, w0
	; CHECK-NEXT: psel p0, p0, p1.d[w12, 0]			; CHECK-NEXT: psel p0, p0, p1.d[w12, 0]
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%res = call <vscale x 2 x i1> @llvm.aarch64.sve.psel.nxv2i1(<vscale x 2 x i1> %p1, <vscale x 2 x i1> %p2, i32 %idx)			%res = call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv2i1(<vscale x 16 x i1> %p1, <vscale x 2 x i1> %p2, i32 %idx)
	ret <vscale x 2 x i1> %res			ret <vscale x 16 x i1> %res
	}			}

	define <vscale x 2 x i1> @psel_d_imm(<vscale x 2 x i1> %p1, <vscale x 2 x i1> %p2, i32 %idx) {			define <vscale x 16 x i1> @psel_d_imm(<vscale x 16 x i1> %p1, <vscale x 2 x i1> %p2, i32 %idx) {
	; CHECK-LABEL: psel_d_imm:			; CHECK-LABEL: psel_d_imm:
	; CHECK: // %bb.0:			; CHECK: // %bb.0:
	; CHECK-NEXT: mov w12, w0			; CHECK-NEXT: mov w12, w0
	; CHECK-NEXT: psel p0, p0, p1.d[w12, 1]			; CHECK-NEXT: psel p0, p0, p1.d[w12, 1]
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%add = add i32 %idx, 1			%add = add i32 %idx, 1
	%res = call <vscale x 2 x i1> @llvm.aarch64.sve.psel.nxv2i1(<vscale x 2 x i1> %p1, <vscale x 2 x i1> %p2, i32 %add)			%res = call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv2i1(<vscale x 16 x i1> %p1, <vscale x 2 x i1> %p2, i32 %add)
	ret <vscale x 2 x i1> %res			ret <vscale x 16 x i1> %res
	}			}

	declare <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, i32)			declare <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, i32)
	declare <vscale x 8 x i1> @llvm.aarch64.sve.psel.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>, i32)			declare <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv8i1(<vscale x 16 x i1>, <vscale x 8 x i1>, i32)
	declare <vscale x 4 x i1> @llvm.aarch64.sve.psel.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>, i32)			declare <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv4i1(<vscale x 16 x i1>, <vscale x 4 x i1>, i32)
	declare <vscale x 2 x i1> @llvm.aarch64.sve.psel.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>, i32)			declare <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv2i1(<vscale x 16 x i1>, <vscale x 2 x i1>, i32)