This is an archive of the discontinued LLVM Phabricator instance.

llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
1115	Thanks for taking a look. This movi variant takes an optional shift immediate value, so indeed for the H and S we need to add 0 immediates. That probably deserves a comment, so will add that.

I think the exact suggestion was to use MOVID instead. I'm not sure how much it matters, but it may be a simpler instruction for some cores. This would then match what GCC emits.

In D99710#2663415, @dmgreen wrote:

I think the exact suggestion was to use MOVID instead. I'm not sure how much it matters, but it may be a simpler instruction for some cores. This would then match what GCC emits.

Well, that's not what GCC does at moment, not yet at least, but it is indeed another way of doing it. But let's go for the movi d0.

Now using "movi d0".

Thanks. This LGTM, so long as the Apple folks here are happy with changing the instruction issued.

llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
33	We may want to do the same with neon movi's, as the instructions set all bits to 0 in either case.

This revision is now accepted and ready to land.Apr 1 2021, 6:12 AM

Harbormaster completed remote builds in B96694: Diff 334661.Apr 1 2021, 6:43 AM

SjoerdMeijer mentioned this in D99586: [AArch64] Default to zero-cycle-zeroing FP registers..Apr 1 2021, 7:53 AM

Thanks, and I will wait a few days with committing.

Closed by commit rGef05b08c612d: [AArch64] Use 64-bit movi for zeroing halfs/floats (authored by SjoerdMeijer). · Explain WhyApr 6 2021, 12:42 AM

This revision was automatically updated to reflect the committed changes.

SjoerdMeijer added a commit: rGef05b08c612d: [AArch64] Use 64-bit movi for zeroing halfs/floats.

Revision Contents

Path

Size

llvm/

lib/

Target/

AArch64/

AArch64AsmPrinter.cpp

21 lines

test/

CodeGen/

AArch64/

arm64-zero-cycle-zeroing.ll

20 lines

f16-imm.ll

2 lines

Diff 334626

llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp

Show First 20 Lines • Show All 1,086 Lines • ▼ Show 20 Lines	void AArch64AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI) {
OutStreamer->AddComment("on-fault: " + HandlerLabel->getName());		OutStreamer->AddComment("on-fault: " + HandlerLabel->getName());
OutStreamer->emitInstruction(MI, getSubtargetInfo());		OutStreamer->emitInstruction(MI, getSubtargetInfo());
}		}

void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) {		void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) {
Register DestReg = MI.getOperand(0).getReg();		Register DestReg = MI.getOperand(0).getReg();
if (STI->hasZeroCycleZeroingFP() && !STI->hasZeroCycleZeroingFPWorkaround()) {		if (STI->hasZeroCycleZeroingFP() && !STI->hasZeroCycleZeroingFPWorkaround()) {
// Convert H/S/D register to corresponding Q register		// Convert H/S/D register to corresponding Q register
if (AArch64::H0 <= DestReg && DestReg <= AArch64::H31)		MCInst MOVI;
		if (AArch64::H0 <= DestReg && DestReg <= AArch64::H31) {
		// Create 64-bit mov using MOVIv4i16.
		MOVI.setOpcode(AArch64::MOVIv4i16);
DestReg = AArch64::Q0 + (DestReg - AArch64::H0);		DestReg = AArch64::Q0 + (DestReg - AArch64::H0);
else if (AArch64::S0 <= DestReg && DestReg <= AArch64::S31)		MOVI.addOperand(MCOperand::createReg(DestReg));
		MOVI.addOperand(MCOperand::createImm(0));
		} else if (AArch64::S0 <= DestReg && DestReg <= AArch64::S31) {
		// Create 64-bit mov using MOVIv2i32.
		MOVI.setOpcode(AArch64::MOVIv2i32);
DestReg = AArch64::Q0 + (DestReg - AArch64::S0);		DestReg = AArch64::Q0 + (DestReg - AArch64::S0);
else {		MOVI.addOperand(MCOperand::createReg(DestReg));
		MOVI.addOperand(MCOperand::createImm(0));
		} else {
assert(AArch64::D0 <= DestReg && DestReg <= AArch64::D31);		assert(AArch64::D0 <= DestReg && DestReg <= AArch64::D31);
DestReg = AArch64::Q0 + (DestReg - AArch64::D0);		// Create 128-bit mov using MOVIv2d_ns.
}
MCInst MOVI;
MOVI.setOpcode(AArch64::MOVIv2d_ns);		MOVI.setOpcode(AArch64::MOVIv2d_ns);
		DestReg = AArch64::Q0 + (DestReg - AArch64::D0);
MOVI.addOperand(MCOperand::createReg(DestReg));		MOVI.addOperand(MCOperand::createReg(DestReg));
		}
MOVI.addOperand(MCOperand::createImm(0));		MOVI.addOperand(MCOperand::createImm(0));
		david-armUnsubmitted Not Done Reply Inline Actions Should this be moved to the `else` case? It looks like we're adding the same immediate twice for the H and S cases. david-arm: Should this be moved to the `else` case? It looks like we're adding the same immediate twice…
		SjoerdMeijerAuthorUnsubmitted Done Reply Inline Actions Thanks for taking a look. This movi variant takes an optional shift immediate value, so indeed for the H and S we need to add 0 immediates. That probably deserves a comment, so will add that. SjoerdMeijer: Thanks for taking a look. This movi variant takes an optional shift immediate value, so indeed…
EmitToStreamer(*OutStreamer, MOVI);		EmitToStreamer(*OutStreamer, MOVI);
} else {		} else {
MCInst FMov;		MCInst FMov;
switch (MI.getOpcode()) {		switch (MI.getOpcode()) {
default: llvm_unreachable("Unexpected opcode");		default: llvm_unreachable("Unexpected opcode");
case AArch64::FMOVH0:		case AArch64::FMOVH0:
FMov.setOpcode(AArch64::FMOVWHr);		FMov.setOpcode(AArch64::FMOVWHr);
FMov.addOperand(MCOperand::createReg(DestReg));		FMov.addOperand(MCOperand::createReg(DestReg));
▲ Show 20 Lines • Show All 389 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll

Show All 22 Lines
; NONEFP: fmov s1, wzr		; NONEFP: fmov s1, wzr
; NONEFP: fmov d2, xzr		; NONEFP: fmov d2, xzr
; NONEFP: movi{{(.16b)?}} v3{{(.2d)?}}, #0		; NONEFP: movi{{(.16b)?}} v3{{(.2d)?}}, #0
; NONE16: fmov h0, wzr		; NONE16: fmov h0, wzr
; NONE16: fmov s1, wzr		; NONE16: fmov s1, wzr
; NONE16: fmov d2, xzr		; NONE16: fmov d2, xzr
; NONE16: movi{{(.16b)?}} v3{{(.2d)?}}, #0		; NONE16: movi{{(.16b)?}} v3{{(.2d)?}}, #0
; ZEROFP-DAG: ldr h0,{{.*}}		; ZEROFP-DAG: ldr h0,{{.*}}
; ZEROFP-DAG: movi v{{[0-3]+}}.2d, #0		; ZEROFP-DAG: movi v1.2s, #0
; ZEROFP-DAG: movi v{{[0-3]+}}.2d, #0		; ZEROFP-DAG: movi v2.2d, #0
; ZEROFP-DAG: movi v{{[0-3]+}}.2d, #0		; ZEROFP-DAG: movi v3.2d, #0
		dmgreenUnsubmitted Not Done Reply Inline Actions We may want to do the same with neon movi's, as the instructions set all bits to 0 in either case. dmgreen: We may want to do the same with neon movi's, as the instructions set all bits to 0 in either…
; ZERO16: movi v{{[0-3]+}}.2d, #0		; ZERO16: movi v0.4h, #0
; ZERO16: movi v{{[0-3]+}}.2d, #0		; ZERO16: movi v1.2s, #0
; ZERO16: movi v{{[0-3]+}}.2d, #0		; ZERO16: movi v2.2d, #0
; ZERO16: movi v{{[0-3]+}}.2d, #0		; ZERO16: movi v3.2d, #0
tail call void @bar(half 0.000000e+00, float 0.000000e+00, double 0.000000e+00, <2 x double> <double 0.000000e+00, double 0.000000e+00>) nounwind		tail call void @bar(half 0.000000e+00, float 0.000000e+00, double 0.000000e+00, <2 x double> <double 0.000000e+00, double 0.000000e+00>) nounwind
ret void		ret void
}		}

define void @t2() nounwind ssp {		define void @t2() nounwind ssp {
entry:		entry:
; ALL-LABEL: t2:		; ALL-LABEL: t2:
; NONEGP: mov w0, wzr		; NONEGP: mov w0, wzr
Show All 14 Lines	; ZEROGP: mov x1, #0
tail call void @barl(i64 0, i64 0) nounwind		tail call void @barl(i64 0, i64 0) nounwind
ret void		ret void
}		}

define void @t4() nounwind ssp {		define void @t4() nounwind ssp {
; ALL-LABEL: t4:		; ALL-LABEL: t4:
; NONEFP: fmov s{{[0-3]+}}, wzr		; NONEFP: fmov s{{[0-3]+}}, wzr
; NONEFP: fmov s{{[0-3]+}}, wzr		; NONEFP: fmov s{{[0-3]+}}, wzr
; ZEROFP: movi v{{[0-3]+}}.2d, #0		; ZEROFP: movi v0.2s, #0
; ZEROFP: movi v{{[0-3]+}}.2d, #0		; ZEROFP: movi v1.2s, #0
tail call void @barf(float 0.000000e+00, float 0.000000e+00) nounwind		tail call void @barf(float 0.000000e+00, float 0.000000e+00) nounwind
ret void		ret void
}		}

declare double @sin(double)		declare double @sin(double)

; We used to produce spills+reloads for a Q register with zero cycle zeroing		; We used to produce spills+reloads for a Q register with zero cycle zeroing
; enabled.		; enabled.
▲ Show 20 Lines • Show All 64 Lines • ▼ Show 20 Lines
; ZEROGP: mov x0, #0		; ZEROGP: mov x0, #0
ret i64 0		ret i64 0
}		}

define float @tf32() {		define float @tf32() {
entry:		entry:
; ALL-LABEL: tf32:		; ALL-LABEL: tf32:
; NONEFP: mov s0, wzr		; NONEFP: mov s0, wzr
; ZEROFP: movi v0.2d, #0		; ZEROFP: movi v0.2s, #0
ret float 0.0		ret float 0.0
}		}

define double @td64() {		define double @td64() {
entry:		entry:
; ALL-LABEL: td64:		; ALL-LABEL: td64:
; NONEFP: mov d0, xzr		; NONEFP: mov d0, xzr
; ZEROFP: movi v0.2d, #0		; ZEROFP: movi v0.2d, #0
▲ Show 20 Lines • Show All 73 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/f16-imm.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16 \| FileCheck %s --check-prefixes=CHECK,CHECK-FP16,CHECK-NOZCZ			; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16 \| FileCheck %s --check-prefixes=CHECK,CHECK-FP16,CHECK-NOZCZ
	; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16,+zcz \| FileCheck %s --check-prefixes=CHECK,CHECK-FP16,CHECK-ZCZ			; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+fullfp16,+zcz \| FileCheck %s --check-prefixes=CHECK,CHECK-FP16,CHECK-ZCZ
	; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=-fullfp16 \| FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16			; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=-fullfp16 \| FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16

	define half @Const0() {			define half @Const0() {
	; CHECK-NOZCZ-LABEL: Const0:			; CHECK-NOZCZ-LABEL: Const0:
	; CHECK-NOZCZ: // %bb.0: // %entry			; CHECK-NOZCZ: // %bb.0: // %entry
	; CHECK-NOZCZ-NEXT: fmov h0, wzr			; CHECK-NOZCZ-NEXT: fmov h0, wzr
	; CHECK-NOZCZ-NEXT: ret			; CHECK-NOZCZ-NEXT: ret
	;			;
	; CHECK-ZCZ-LABEL: Const0:			; CHECK-ZCZ-LABEL: Const0:
	; CHECK-ZCZ: // %bb.0: // %entry			; CHECK-ZCZ: // %bb.0: // %entry
	; CHECK-ZCZ-NEXT: movi v0.2d, #0000000000000000			; CHECK-ZCZ-NEXT: movi v0.4h, #0
	; CHECK-ZCZ-NEXT: ret			; CHECK-ZCZ-NEXT: ret
	;			;
	; CHECK-NOFP16-LABEL: Const0:			; CHECK-NOFP16-LABEL: Const0:
	; CHECK-NOFP16: // %bb.0: // %entry			; CHECK-NOFP16: // %bb.0: // %entry
	; CHECK-NOFP16-NEXT: adrp x8, .LCPI0_0			; CHECK-NOFP16-NEXT: adrp x8, .LCPI0_0
	; CHECK-NOFP16-NEXT: ldr h0, [x8, :lo12:.LCPI0_0]			; CHECK-NOFP16-NEXT: ldr h0, [x8, :lo12:.LCPI0_0]
	; CHECK-NOFP16-NEXT: ret			; CHECK-NOFP16-NEXT: ret
	entry:			entry:
	▲ Show 20 Lines • Show All 93 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64] Use 64-bit movi for zeroing halfs/floatsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 334626

llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp

llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll

llvm/test/CodeGen/AArch64/f16-imm.ll

[AArch64] Use 64-bit movi for zeroing halfs/floats
ClosedPublic