This is an archive of the discontinued LLVM Phabricator instance.

[AArch64][SVE] Improve codegen when extracting first lane of active lane mask
ClosedPublic

Authored by RosieSumpter on May 9 2022, 2:56 AM.

Download Raw Diff

Details

Reviewers

david-arm
sdesmalen
kmclaughlin
CarolineConcatto
Allen
efriedma

Commits

rG1a2665902f12: [AArch64][SVE] Improve codegen when extracting first lane of active lane mask

Summary

When extracting the first lane of a predicate created using the
llvm.get.active.lane.mask intrinsic, it should give the same codegen as
when the predicate is created using the llvm.aarch64.sve.whilelo
intrinsic, since get.active.lane.mask is lowered to whilelo. This patch
ensures the codegen is the same by recognizing
llvm.get.active.lane.mask as a flag-setting operation in this case.

Diff Detail

Event Timeline

RosieSumpter created this revision.May 9 2022, 2:56 AM

Herald added a reviewer: efriedma. · View Herald TranscriptMay 9 2022, 2:56 AM

Herald added a project: Restricted Project. · View Herald Transcript

Herald added subscribers: ctetreau, psnobl, hiraditya and 2 others. · View Herald Transcript

RosieSumpter requested review of this revision.May 9 2022, 2:56 AM

Herald added a project: Restricted Project. · View Herald TranscriptMay 9 2022, 2:56 AM

Herald added a subscriber: llvm-commits. · View Herald Transcript

Harbormaster completed remote builds in B163446: Diff 428018.May 9 2022, 3:27 AM

LGTM! Thanks for the codegen improvement @RosieSumpter. :)

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
14654	nit: Could you add a short comment here explaining that get_active_lane_mask is lowered to a whilelo instruction?

This revision is now accepted and ready to land.May 9 2022, 3:56 AM

Allen accepted this revision.May 9 2022, 5:36 AM

This revision was landed with ongoing or failed builds.May 9 2022, 6:02 AM

Closed by commit rG1a2665902f12: [AArch64][SVE] Improve codegen when extracting first lane of active lane mask (authored by RosieSumpter). · Explain Why

This revision was automatically updated to reflect the committed changes.

RosieSumpter added a commit: rG1a2665902f12: [AArch64][SVE] Improve codegen when extracting first lane of active lane mask.

Revision Contents

Path

Size

llvm/

lib/

Target/

AArch64/

AArch64ISelLowering.cpp

3 lines

test/

CodeGen/

AArch64/

sve-cmp-folds.ll

12 lines

Diff 428018

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 14,644 Lines • ▼ Show 20 Lines	if ((N.getOpcode() == ISD::SETCC) \|\|
(N.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&		(N.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
(N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilege \|\|		(N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilege \|\|
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilegt \|\|		N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilegt \|\|
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehi \|\|		N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehi \|\|
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehs \|\|		N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehs \|\|
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilele \|\|		N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilele \|\|
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelo \|\|		N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelo \|\|
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels \|\|		N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels \|\|
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt)))		N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt \|\|
		N.getConstantOperandVal(0) == Intrinsic::get_active_lane_mask)))
		david-armUnsubmitted Not Done Reply Inline Actions nit: Could you add a short comment here explaining that get_active_lane_mask is lowered to a whilelo instruction? david-arm: nit: Could you add a short comment here explaining that get_active_lane_mask is lowered to a…
return true;		return true;

return false;		return false;
}		}

// Materialize : i1 = extract_vector_elt t37, Constant:i64<0>		// Materialize : i1 = extract_vector_elt t37, Constant:i64<0>
// ... into: "ptrue p, all" + PTEST		// ... into: "ptrue p, all" + PTEST
static SDValue		static SDValue
▲ Show 20 Lines • Show All 6,500 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/sve-cmp-folds.ll

	Show First 20 Lines • Show All 164 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: whilelt p0.s, x0, x1			; CHECK-NEXT: whilelt p0.s, x0, x1
	; CHECK-NEXT: cset w0, mi			; CHECK-NEXT: cset w0, mi
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	%predicate = call <vscale x 4 x i1> @llvm.aarch64.sve.whilelt.nxv4i1.i64(i64 %next, i64 %end)			%predicate = call <vscale x 4 x i1> @llvm.aarch64.sve.whilelt.nxv4i1.i64(i64 %next, i64 %end)
	%bit = extractelement <vscale x 4 x i1> %predicate, i64 0			%bit = extractelement <vscale x 4 x i1> %predicate, i64 0
	ret i1 %bit			ret i1 %bit
	}			}

				define i1 @lane_mask_first(i64 %next, i64 %end) {
				; CHECK-LABEL: lane_mask_first:
				; CHECK: // %bb.0:
				; CHECK-NEXT: whilelo p0.s, x0, x1
				; CHECK-NEXT: cset w0, mi
				; CHECK-NEXT: ret
				%predicate = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 %next, i64 %end)
				%bit = extractelement <vscale x 4 x i1> %predicate, i64 0
				ret i1 %bit
				}

	declare i64 @llvm.vscale.i64()			declare i64 @llvm.vscale.i64()
	declare <vscale x 4 x i1> @llvm.aarch64.sve.whilege.nxv4i1.i64(i64, i64)			declare <vscale x 4 x i1> @llvm.aarch64.sve.whilege.nxv4i1.i64(i64, i64)
	declare <vscale x 4 x i1> @llvm.aarch64.sve.whilegt.nxv4i1.i64(i64, i64)			declare <vscale x 4 x i1> @llvm.aarch64.sve.whilegt.nxv4i1.i64(i64, i64)
	declare <vscale x 4 x i1> @llvm.aarch64.sve.whilehi.nxv4i1.i64(i64, i64)			declare <vscale x 4 x i1> @llvm.aarch64.sve.whilehi.nxv4i1.i64(i64, i64)
	declare <vscale x 4 x i1> @llvm.aarch64.sve.whilehs.nxv4i1.i64(i64, i64)			declare <vscale x 4 x i1> @llvm.aarch64.sve.whilehs.nxv4i1.i64(i64, i64)
	declare <vscale x 4 x i1> @llvm.aarch64.sve.whilele.nxv4i1.i64(i64, i64)			declare <vscale x 4 x i1> @llvm.aarch64.sve.whilele.nxv4i1.i64(i64, i64)
	declare <vscale x 4 x i1> @llvm.aarch64.sve.whilelo.nxv4i1.i64(i64, i64)			declare <vscale x 4 x i1> @llvm.aarch64.sve.whilelo.nxv4i1.i64(i64, i64)
	declare <vscale x 4 x i1> @llvm.aarch64.sve.whilels.nxv4i1.i64(i64, i64)			declare <vscale x 4 x i1> @llvm.aarch64.sve.whilels.nxv4i1.i64(i64, i64)
	declare <vscale x 4 x i1> @llvm.aarch64.sve.whilelt.nxv4i1.i64(i64, i64)			declare <vscale x 4 x i1> @llvm.aarch64.sve.whilelt.nxv4i1.i64(i64, i64)
				declare <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64, i64)