This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/
-
lib/Target/AArch64/
-
Target/
-
AArch64/
-
AArch64InstrInfo.td
-
test/CodeGen/AArch64/
-
CodeGen/
-
AArch64/
-
arm64-scvt.ll
-
int-to-fp-no-neon.ll

Differential D125470

[AArch64] Predicate SSHLL;SCVTF patterns behind UseAlternateSExtLoadCVTF32
ClosedPublic

Authored by dmgreen on May 12 2022, 8:00 AM.

Download Raw Diff

Details

Reviewers

SjoerdMeijer
jaykang10
samtebbs
t.p.northover

Commits

rG5d29d752735e: [AArch64] Predicate SSHLL;SCVTF patterns behind UseAlternateSExtLoadCVTF32

Summary

There have been some patterns in the AArch64 backend to optimize code of the form:

ldrsh w8, [x0]
scvtf s0, w8

to:

ldr h0, [x0]      
sshll v0.4s, v0.4h, #0
scvtf s0, s0

The idea is to remove the GRP->FPR move, but in reality is making code larger and slower (or the same) on all the cpus I tried.

This patch adds the UseAlternateSExtLoadCVTF32 predicate similar to nearby related pattern.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

dmgreen created this revision.May 12 2022, 8:00 AM

Herald added a project: Restricted Project. · View Herald TranscriptMay 12 2022, 8:00 AM

Herald added subscribers: hiraditya, kristof.beyls. · View Herald Transcript

dmgreen requested review of this revision.May 12 2022, 8:00 AM

Herald added a project: Restricted Project. · View Herald TranscriptMay 12 2022, 8:00 AM

georges added a subscriber: georges.May 12 2022, 8:02 AM

Looks like a good fix. One quick question: I see that some CPUs have FeatureAlternateSExtLoadCVTF32Pattern set. Is that something we want too?

In D125470#3510856, @SjoerdMeijer wrote:

Looks like a good fix. One quick question: I see that some CPUs have FeatureAlternateSExtLoadCVTF32Pattern set. Is that something we want too?

I don't believe so no, not for that option. Not for any of the cpus I tried at least.

Ok, cheers, LGTM

This revision is now accepted and ready to land.May 13 2022, 1:43 AM

This revision was landed with ongoing or failed builds.May 16 2022, 10:00 AM

Closed by commit rG5d29d752735e: [AArch64] Predicate SSHLL;SCVTF patterns behind UseAlternateSExtLoadCVTF32 (authored by dmgreen). · Explain Why

This revision was automatically updated to reflect the committed changes.

dmgreen added a commit: rG5d29d752735e: [AArch64] Predicate SSHLL;SCVTF patterns behind UseAlternateSExtLoadCVTF32.

Revision Contents

Path

Size

llvm/

lib/

Target/

AArch64/

AArch64InstrInfo.td

6 lines

test/

CodeGen/

AArch64/

arm64-scvt.ll

126 lines

int-to-fp-no-neon.ll

40 lines

Diff 429754

llvm/lib/Target/AArch64/AArch64InstrInfo.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

	Show First 20 Lines • Show All 6,784 Lines • ▼ Show 20 Lines
	class SExtLoadi16CVTf32Pat<dag addrmode, dag INST>			class SExtLoadi16CVTf32Pat<dag addrmode, dag INST>
	: Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))),			: Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))),
	(SCVTFv1i32 (f32 (EXTRACT_SUBREG			(SCVTFv1i32 (f32 (EXTRACT_SUBREG
	(SSHLLv4i16_shift			(SSHLLv4i16_shift
	(INSERT_SUBREG (f64 (IMPLICIT_DEF)),			(INSERT_SUBREG (f64 (IMPLICIT_DEF)),
	INST,			INST,
	hsub),			hsub),
	0),			0),
	ssub)))>, Requires<[NotForCodeSize, HasNEON]>;			ssub)))>,
				Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;

	def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),			def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
	(LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;			(LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
	def : SExtLoadi16CVTf32Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),			def : SExtLoadi16CVTf32Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
	(LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>;			(LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>;
	def : SExtLoadi16CVTf32Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset),			def : SExtLoadi16CVTf32Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset),
	(LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;			(LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
	def : SExtLoadi16CVTf32Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset),			def : SExtLoadi16CVTf32Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset),
	Show All 36 Lines
	class SExtLoadi32CVTf64Pat<dag addrmode, dag INST>			class SExtLoadi32CVTf64Pat<dag addrmode, dag INST>
	: Pat <(f64 (sint_to_fp (i32 (load addrmode)))),			: Pat <(f64 (sint_to_fp (i32 (load addrmode)))),
	(SCVTFv1i64 (f64 (EXTRACT_SUBREG			(SCVTFv1i64 (f64 (EXTRACT_SUBREG
	(SSHLLv2i32_shift			(SSHLLv2i32_shift
	(INSERT_SUBREG (f64 (IMPLICIT_DEF)),			(INSERT_SUBREG (f64 (IMPLICIT_DEF)),
	INST,			INST,
	ssub),			ssub),
	0),			0),
	dsub)))>, Requires<[NotForCodeSize, HasNEON]>;			dsub)))>,
				Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;

	def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext),			def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext),
	(LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>;			(LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>;
	def : SExtLoadi32CVTf64Pat<(ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext),			def : SExtLoadi32CVTf64Pat<(ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext),
	(LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext)>;			(LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext)>;
	def : SExtLoadi32CVTf64Pat<(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset),			def : SExtLoadi32CVTf64Pat<(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset),
	(LDRSui GPR64sp:$Rn, uimm12s4:$offset)>;			(LDRSui GPR64sp:$Rn, uimm12s4:$offset)>;
	def : SExtLoadi32CVTf64Pat<(am_unscaled32 GPR64sp:$Rn, simm9:$offset),			def : SExtLoadi32CVTf64Pat<(am_unscaled32 GPR64sp:$Rn, simm9:$offset),
	▲ Show 20 Lines • Show All 1,460 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/arm64-scvt.ll

Show First 20 Lines • Show All 480 Lines • ▼ Show 20 Lines	entry:
%addr = getelementptr i8, i8* %sp0, i64 1		%addr = getelementptr i8, i8* %sp0, i64 1
%pix_sp0.0.copyload = load i8, i8* %addr, align 1		%pix_sp0.0.copyload = load i8, i8* %addr, align 1
%val = sitofp i8 %pix_sp0.0.copyload to float		%val = sitofp i8 %pix_sp0.0.copyload to float
%vmull.i = fmul float %val, %val		%vmull.i = fmul float %val, %val
ret float %vmull.i		ret float %vmull.i
}		}

define float @sfct2(i16* nocapture %sp0) {		define float @sfct2(i16* nocapture %sp0) {
; CHECK-LABEL: sfct2:		; CHECK-CYC-LABEL: sfct2:
; CHECK: // %bb.0: // %entry		; CHECK-CYC: // %bb.0: // %entry
; CHECK-NEXT: ldr h0, [x0, #2]		; CHECK-CYC-NEXT: ldr h0, [x0, #2]
; CHECK-NEXT: sshll v0.4s, v0.4h, #0		; CHECK-CYC-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-NEXT: scvtf s0, s0		; CHECK-CYC-NEXT: scvtf s0, s0
; CHECK-NEXT: fmul s0, s0, s0		; CHECK-CYC-NEXT: fmul s0, s0, s0
; CHECK-NEXT: ret		; CHECK-CYC-NEXT: ret
		;
		; CHECK-A57-LABEL: sfct2:
		; CHECK-A57: // %bb.0: // %entry
		; CHECK-A57-NEXT: ldrsh w8, [x0, #2]
		; CHECK-A57-NEXT: scvtf s0, w8
		; CHECK-A57-NEXT: fmul s0, s0, s0
		; CHECK-A57-NEXT: ret
entry:		entry:
%addr = getelementptr i16, i16* %sp0, i64 1		%addr = getelementptr i16, i16* %sp0, i64 1
%pix_sp0.0.copyload = load i16, i16* %addr, align 1		%pix_sp0.0.copyload = load i16, i16* %addr, align 1
%val = sitofp i16 %pix_sp0.0.copyload to float		%val = sitofp i16 %pix_sp0.0.copyload to float
%vmull.i = fmul float %val, %val		%vmull.i = fmul float %val, %val
ret float %vmull.i		ret float %vmull.i
}		}

▲ Show 20 Lines • Show All 49 Lines • ▼ Show 20 Lines	entry:
%addr = getelementptr i8, i8* %sp0, i64 %offset		%addr = getelementptr i8, i8* %sp0, i64 %offset
%pix_sp0.0.copyload = load i8, i8* %addr, align 1		%pix_sp0.0.copyload = load i8, i8* %addr, align 1
%val = sitofp i8 %pix_sp0.0.copyload to float		%val = sitofp i8 %pix_sp0.0.copyload to float
%vmull.i = fmul float %val, %val		%vmull.i = fmul float %val, %val
ret float %vmull.i		ret float %vmull.i
}		}

define float @sfct6(i16* nocapture %sp0, i64 %offset) {		define float @sfct6(i16* nocapture %sp0, i64 %offset) {
; CHECK-LABEL: sfct6:		; CHECK-CYC-LABEL: sfct6:
; CHECK: // %bb.0: // %entry		; CHECK-CYC: // %bb.0: // %entry
; CHECK-NEXT: ldr h0, [x0, x1, lsl #1]		; CHECK-CYC-NEXT: ldr h0, [x0, x1, lsl #1]
; CHECK-NEXT: sshll v0.4s, v0.4h, #0		; CHECK-CYC-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-NEXT: scvtf s0, s0		; CHECK-CYC-NEXT: scvtf s0, s0
; CHECK-NEXT: fmul s0, s0, s0		; CHECK-CYC-NEXT: fmul s0, s0, s0
; CHECK-NEXT: ret		; CHECK-CYC-NEXT: ret
		;
		; CHECK-A57-LABEL: sfct6:
		; CHECK-A57: // %bb.0: // %entry
		; CHECK-A57-NEXT: ldrsh w8, [x0, x1, lsl #1]
		; CHECK-A57-NEXT: scvtf s0, w8
		; CHECK-A57-NEXT: fmul s0, s0, s0
		; CHECK-A57-NEXT: ret
entry:		entry:
%addr = getelementptr i16, i16* %sp0, i64 %offset		%addr = getelementptr i16, i16* %sp0, i64 %offset
%pix_sp0.0.copyload = load i16, i16* %addr, align 1		%pix_sp0.0.copyload = load i16, i16* %addr, align 1
%val = sitofp i16 %pix_sp0.0.copyload to float		%val = sitofp i16 %pix_sp0.0.copyload to float
%vmull.i = fmul float %val, %val		%vmull.i = fmul float %val, %val
ret float %vmull.i		ret float %vmull.i
}		}

▲ Show 20 Lines • Show All 64 Lines • ▼ Show 20 Lines	entry:
%addr = getelementptr i16, i16* %sp0, i64 1		%addr = getelementptr i16, i16* %sp0, i64 1
%pix_sp0.0.copyload = load i16, i16* %addr, align 1		%pix_sp0.0.copyload = load i16, i16* %addr, align 1
%val = sitofp i16 %pix_sp0.0.copyload to double		%val = sitofp i16 %pix_sp0.0.copyload to double
%vmull.i = fmul double %val, %val		%vmull.i = fmul double %val, %val
ret double %vmull.i		ret double %vmull.i
}		}

define double @sfct11(i32* nocapture %sp0) {		define double @sfct11(i32* nocapture %sp0) {
; CHECK-LABEL: sfct11:		; CHECK-CYC-LABEL: sfct11:
; CHECK: // %bb.0: // %entry		; CHECK-CYC: // %bb.0: // %entry
; CHECK-NEXT: ldr s0, [x0, #4]		; CHECK-CYC-NEXT: ldr s0, [x0, #4]
; CHECK-NEXT: sshll v0.2d, v0.2s, #0		; CHECK-CYC-NEXT: sshll v0.2d, v0.2s, #0
; CHECK-NEXT: scvtf d0, d0		; CHECK-CYC-NEXT: scvtf d0, d0
; CHECK-NEXT: fmul d0, d0, d0		; CHECK-CYC-NEXT: fmul d0, d0, d0
; CHECK-NEXT: ret		; CHECK-CYC-NEXT: ret
		;
		; CHECK-A57-LABEL: sfct11:
		; CHECK-A57: // %bb.0: // %entry
		; CHECK-A57-NEXT: ldr w8, [x0, #4]
		; CHECK-A57-NEXT: scvtf d0, w8
		; CHECK-A57-NEXT: fmul d0, d0, d0
		; CHECK-A57-NEXT: ret
entry:		entry:
%addr = getelementptr i32, i32* %sp0, i64 1		%addr = getelementptr i32, i32* %sp0, i64 1
%pix_sp0.0.copyload = load i32, i32* %addr, align 1		%pix_sp0.0.copyload = load i32, i32* %addr, align 1
%val = sitofp i32 %pix_sp0.0.copyload to double		%val = sitofp i32 %pix_sp0.0.copyload to double
%vmull.i = fmul double %val, %val		%vmull.i = fmul double %val, %val
ret double %vmull.i		ret double %vmull.i
}		}

▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines	entry:
%addr = getelementptr i16, i16* %sp0, i64 %offset		%addr = getelementptr i16, i16* %sp0, i64 %offset
%pix_sp0.0.copyload = load i16, i16* %addr, align 1		%pix_sp0.0.copyload = load i16, i16* %addr, align 1
%val = sitofp i16 %pix_sp0.0.copyload to double		%val = sitofp i16 %pix_sp0.0.copyload to double
%vmull.i = fmul double %val, %val		%vmull.i = fmul double %val, %val
ret double %vmull.i		ret double %vmull.i
}		}

define double @sfct15(i32* nocapture %sp0, i64 %offset) {		define double @sfct15(i32* nocapture %sp0, i64 %offset) {
; CHECK-LABEL: sfct15:		; CHECK-CYC-LABEL: sfct15:
; CHECK: // %bb.0: // %entry		; CHECK-CYC: // %bb.0: // %entry
; CHECK-NEXT: ldr s0, [x0, x1, lsl #2]		; CHECK-CYC-NEXT: ldr s0, [x0, x1, lsl #2]
; CHECK-NEXT: sshll v0.2d, v0.2s, #0		; CHECK-CYC-NEXT: sshll v0.2d, v0.2s, #0
; CHECK-NEXT: scvtf d0, d0		; CHECK-CYC-NEXT: scvtf d0, d0
; CHECK-NEXT: fmul d0, d0, d0		; CHECK-CYC-NEXT: fmul d0, d0, d0
; CHECK-NEXT: ret		; CHECK-CYC-NEXT: ret
		;
		; CHECK-A57-LABEL: sfct15:
		; CHECK-A57: // %bb.0: // %entry
		; CHECK-A57-NEXT: ldr w8, [x0, x1, lsl #2]
		; CHECK-A57-NEXT: scvtf d0, w8
		; CHECK-A57-NEXT: fmul d0, d0, d0
		; CHECK-A57-NEXT: ret
entry:		entry:
%addr = getelementptr i32, i32* %sp0, i64 %offset		%addr = getelementptr i32, i32* %sp0, i64 %offset
%pix_sp0.0.copyload = load i32, i32* %addr, align 1		%pix_sp0.0.copyload = load i32, i32* %addr, align 1
%val = sitofp i32 %pix_sp0.0.copyload to double		%val = sitofp i32 %pix_sp0.0.copyload to double
%vmull.i = fmul double %val, %val		%vmull.i = fmul double %val, %val
ret double %vmull.i		ret double %vmull.i
}		}

Show All 35 Lines	entry:
%addr = inttoptr i64 %add to i8*		%addr = inttoptr i64 %add to i8*
%pix_sp0.0.copyload = load i8, i8* %addr, align 1		%pix_sp0.0.copyload = load i8, i8* %addr, align 1
%val = sitofp i8 %pix_sp0.0.copyload to float		%val = sitofp i8 %pix_sp0.0.copyload to float
%vmull.i = fmul float %val, %val		%vmull.i = fmul float %val, %val
ret float %vmull.i		ret float %vmull.i
}		}

define float @sfct18(i16* nocapture %sp0) {		define float @sfct18(i16* nocapture %sp0) {
; CHECK-LABEL: sfct18:		; CHECK-CYC-LABEL: sfct18:
; CHECK: // %bb.0:		; CHECK-CYC: // %bb.0:
; CHECK-NEXT: ldur h0, [x0, #1]		; CHECK-CYC-NEXT: ldur h0, [x0, #1]
; CHECK-NEXT: sshll v0.4s, v0.4h, #0		; CHECK-CYC-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-NEXT: scvtf s0, s0		; CHECK-CYC-NEXT: scvtf s0, s0
; CHECK-NEXT: fmul s0, s0, s0		; CHECK-CYC-NEXT: fmul s0, s0, s0
; CHECK-NEXT: ret		; CHECK-CYC-NEXT: ret
		;
		; CHECK-A57-LABEL: sfct18:
		; CHECK-A57: // %bb.0:
		; CHECK-A57-NEXT: ldursh w8, [x0, #1]
		; CHECK-A57-NEXT: scvtf s0, w8
		; CHECK-A57-NEXT: fmul s0, s0, s0
		; CHECK-A57-NEXT: ret
%bitcast = ptrtoint i16* %sp0 to i64		%bitcast = ptrtoint i16* %sp0 to i64
%add = add i64 %bitcast, 1		%add = add i64 %bitcast, 1
%addr = inttoptr i64 %add to i16*		%addr = inttoptr i64 %add to i16*
%pix_sp0.0.copyload = load i16, i16* %addr, align 1		%pix_sp0.0.copyload = load i16, i16* %addr, align 1
%val = sitofp i16 %pix_sp0.0.copyload to float		%val = sitofp i16 %pix_sp0.0.copyload to float
%vmull.i = fmul float %val, %val		%vmull.i = fmul float %val, %val
ret float %vmull.i		ret float %vmull.i
}		}
▲ Show 20 Lines • Show All 71 Lines • ▼ Show 20 Lines	; CHECK-A57-NEXT: ret
%addr = inttoptr i64 %add to i16*		%addr = inttoptr i64 %add to i16*
%pix_sp0.0.copyload = load i16, i16* %addr, align 1		%pix_sp0.0.copyload = load i16, i16* %addr, align 1
%val = sitofp i16 %pix_sp0.0.copyload to double		%val = sitofp i16 %pix_sp0.0.copyload to double
%vmull.i = fmul double %val, %val		%vmull.i = fmul double %val, %val
ret double %vmull.i		ret double %vmull.i
}		}

define double @sfct23(i32* nocapture %sp0) {		define double @sfct23(i32* nocapture %sp0) {
; CHECK-LABEL: sfct23:		; CHECK-CYC-LABEL: sfct23:
; CHECK: // %bb.0:		; CHECK-CYC: // %bb.0:
; CHECK-NEXT: ldur s0, [x0, #1]		; CHECK-CYC-NEXT: ldur s0, [x0, #1]
; CHECK-NEXT: sshll v0.2d, v0.2s, #0		; CHECK-CYC-NEXT: sshll v0.2d, v0.2s, #0
; CHECK-NEXT: scvtf d0, d0		; CHECK-CYC-NEXT: scvtf d0, d0
; CHECK-NEXT: fmul d0, d0, d0		; CHECK-CYC-NEXT: fmul d0, d0, d0
; CHECK-NEXT: ret		; CHECK-CYC-NEXT: ret
		;
		; CHECK-A57-LABEL: sfct23:
		; CHECK-A57: // %bb.0:
		; CHECK-A57-NEXT: ldur w8, [x0, #1]
		; CHECK-A57-NEXT: scvtf d0, w8
		; CHECK-A57-NEXT: fmul d0, d0, d0
		; CHECK-A57-NEXT: ret
%bitcast = ptrtoint i32* %sp0 to i64		%bitcast = ptrtoint i32* %sp0 to i64
%add = add i64 %bitcast, 1		%add = add i64 %bitcast, 1
%addr = inttoptr i64 %add to i32*		%addr = inttoptr i64 %add to i32*
%pix_sp0.0.copyload = load i32, i32* %addr, align 1		%pix_sp0.0.copyload = load i32, i32* %addr, align 1
%val = sitofp i32 %pix_sp0.0.copyload to double		%val = sitofp i32 %pix_sp0.0.copyload to double
%vmull.i = fmul double %val, %val		%vmull.i = fmul double %val, %val
ret double %vmull.i		ret double %vmull.i
}		}
▲ Show 20 Lines • Show All 134 Lines • Show Last 20 Lines

llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	;; These test functions previously triggered the following error when emitting machine code:			;; These test functions previously triggered the following error when emitting machine code:
	;; LLVM ERROR: Attempting to emit UCVTFv1i64 instruction but the Feature_HasNEON predicate(s) are not met			;; LLVM ERROR: Attempting to emit UCVTFv1i64 instruction but the Feature_HasNEON predicate(s) are not met
	; RUN: llc -mtriple=aarch64 -mattr=+neon,+fullfp16 < %s \| FileCheck %s --check-prefixes=CHECK,NEON-ENABLED			; RUN: llc -mtriple=aarch64 -mattr=+neon,+fullfp16,+alternate-sextload-cvt-f32-pattern < %s \| FileCheck %s --check-prefixes=CHECK,NEON-ENABLED
	; RUN: llc -mtriple=aarch64 -mattr=-neon,+fullfp16 < %s \| FileCheck %s --check-prefixes=CHECK,NEON-DISABLED			; RUN: llc -mtriple=aarch64 -mattr=-neon,+fullfp16,+alternate-sextload-cvt-f32-pattern < %s \| FileCheck %s --check-prefixes=CHECK,NEON-DISABLED
	;; Emit an object file so that verifyPredicates is called (it is not used for ASM output).			;; Emit an object file so that verifyPredicates is called (it is not used for ASM output).
	; RUN: llc -mtriple=aarch64 -mattr=-neon,+fullfp16 -o /dev/null %s --asm-show-inst -filetype=obj			; RUN: llc -mtriple=aarch64 -mattr=-neon,+fullfp16 -o /dev/null %s --asm-show-inst -filetype=obj

	define double @ui8_to_double(i8* %i, float* %f) {			define double @ui8_to_double(i8* %i, float* %f) {
	; NEON-ENABLED-LABEL: ui8_to_double:			; NEON-ENABLED-LABEL: ui8_to_double:
	; NEON-ENABLED: // %bb.0: // %entry			; NEON-ENABLED: // %bb.0: // %entry
	; NEON-ENABLED-NEXT: ldr b0, [x0]			; NEON-ENABLED-NEXT: ldr b0, [x0]
	; NEON-ENABLED-NEXT: ucvtf d0, d0			; NEON-ENABLED-NEXT: ucvtf d0, d0
	▲ Show 20 Lines • Show All 187 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	entry:			entry:
	%ld = load i8, i8* %i, align 1			%ld = load i8, i8* %i, align 1
	%conv = sitofp i8 %ld to double			%conv = sitofp i8 %ld to double
	ret double %conv			ret double %conv
	}			}

	define float @si8_to_float(i8* %i, float* %f) {			define float @si8_to_float(i8* %i, float* %f) {
	; CHECK-LABEL: si8_to_float:			; NEON-ENABLED-LABEL: si8_to_float:
	; CHECK: // %bb.0: // %entry			; NEON-ENABLED: // %bb.0: // %entry
	; CHECK-NEXT: ldrsb w8, [x0]			; NEON-ENABLED-NEXT: ldr b0, [x0]
	; CHECK-NEXT: scvtf s0, w8			; NEON-ENABLED-NEXT: sshll v0.8h, v0.8b, #0
	; CHECK-NEXT: ret			; NEON-ENABLED-NEXT: sshll v0.4s, v0.4h, #0
				; NEON-ENABLED-NEXT: scvtf s0, s0
				; NEON-ENABLED-NEXT: ret
				;
				; NEON-DISABLED-LABEL: si8_to_float:
				; NEON-DISABLED: // %bb.0: // %entry
				; NEON-DISABLED-NEXT: ldrsb w8, [x0]
				; NEON-DISABLED-NEXT: scvtf s0, w8
				; NEON-DISABLED-NEXT: ret
	entry:			entry:
	%ld = load i8, i8* %i, align 1			%ld = load i8, i8* %i, align 1
	%conv = sitofp i8 %ld to float			%conv = sitofp i8 %ld to float
	ret float %conv			ret float %conv
	}			}

	define half @si8_to_half(i8* %i, half* %f) {			define half @si8_to_half(i8* %i, half* %f) {
	; CHECK-LABEL: si8_to_half:			; CHECK-LABEL: si8_to_half:
	; CHECK: // %bb.0: // %entry			; CHECK: // %bb.0: // %entry
	; CHECK-NEXT: ldrsb w8, [x0]			; CHECK-NEXT: ldrsb w8, [x0]
	; CHECK-NEXT: scvtf h0, w8			; CHECK-NEXT: scvtf h0, w8
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	entry:			entry:
	%ld = load i8, i8* %i, align 1			%ld = load i8, i8* %i, align 1
	%conv = sitofp i8 %ld to half			%conv = sitofp i8 %ld to half
	ret half %conv			ret half %conv
	}			}

	define double @si16_to_double(i16* %i, float* %f) {			define double @si16_to_double(i16* %i, float* %f) {
	; CHECK-LABEL: si16_to_double:			; NEON-ENABLED-LABEL: si16_to_double:
	; CHECK: // %bb.0: // %entry			; NEON-ENABLED: // %bb.0: // %entry
	; CHECK-NEXT: ldrsh w8, [x0]			; NEON-ENABLED-NEXT: ldr h0, [x0]
	; CHECK-NEXT: scvtf d0, w8			; NEON-ENABLED-NEXT: sshll v0.4s, v0.4h, #0
	; CHECK-NEXT: ret			; NEON-ENABLED-NEXT: sshll v0.2d, v0.2s, #0
				; NEON-ENABLED-NEXT: scvtf d0, d0
				; NEON-ENABLED-NEXT: ret
				;
				; NEON-DISABLED-LABEL: si16_to_double:
				; NEON-DISABLED: // %bb.0: // %entry
				; NEON-DISABLED-NEXT: ldrsh w8, [x0]
				; NEON-DISABLED-NEXT: scvtf d0, w8
				; NEON-DISABLED-NEXT: ret
	entry:			entry:
	%ld = load i16, i16* %i, align 1			%ld = load i16, i16* %i, align 1
	%conv = sitofp i16 %ld to double			%conv = sitofp i16 %ld to double
	ret double %conv			ret double %conv
	}			}

	define float @si16_to_float(i16* %i, float* %f) {			define float @si16_to_float(i16* %i, float* %f) {
	; NEON-ENABLED-LABEL: si16_to_float:			; NEON-ENABLED-LABEL: si16_to_float:
	▲ Show 20 Lines • Show All 119 Lines • Show Last 20 Lines