This is an archive of the discontinued LLVM Phabricator instance.

Disable some optimization cases for type conversion from sint to fp
ClosedPublic

Authored by Jiangning on Jul 22 2014, 9:01 PM.

Download Raw Diff

Details

Reviewers

t.p.northover

Summary

As described by the comments below, some Pats intends to generate complicated instruction sequences for the type conversion from i8 to f32 and from i16 to f64 due to performance concerns.

If an integer is about to be converted to a floating point value,
just load it on the floating point unit.
These patterns are more complex because floating point loads do not
support sign extension.
The sign extension has to be explicitly added and is only supported for
one step: byte-to-half, half-to-word, word-to-doubleword.
SCVTF GPR -> FPR is 9 cycles.
SCVTF FPR -> FPR is 4 cyclces.
(sign extension with lengthen) SXTL FPR -> FPR is 2 cycles.
Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR
and still being faster.
However, this is not good for code size.
// 8-bits -> float. 2 sizes step-up.

Unfortunately this may not be true for micro-architectures other than Cyclone.

This patch adds a new predicate for Cyclone in .td file, and disable two Pats of generating this complicated patterns, and finally we can directly generate scvtf instruction for micro-architectures other than Cyclone.

Diff Detail

Event Timeline

Jiangning updated this revision to Diff 11800.Jul 22 2014, 9:01 PM

Jiangning retitled this revision from to Disable some optimization cases for type conversion from sint to fp.

Jiangning updated this object.

Jiangning edited the test plan for this revision. (Show Details)

Jiangning added a reviewer: t.p.northover.

Jiangning added a subscriber: Unknown Object (MLST).

Herald added subscribers: mroth, mcrosier. · View Herald TranscriptJul 22 2014, 9:01 PM

Hi Jiangning,

Looks completely sensible to me. Go for it!

Tim.

t.p.northover accepted this revision.Jul 22 2014, 11:51 PM

t.p.northover edited edge metadata.

This revision is now accepted and ready to land.Jul 22 2014, 11:51 PM

Committed in rL213827.

Revision Contents

Path

Size

lib/

Target/

AArch64/

AArch64InstrInfo.td

7 lines

test/

CodeGen/

AArch64/

arm64-scvt.ll

27 lines

Diff 11800

lib/Target/AArch64/AArch64InstrInfo.td

Context not available.
	AssemblerPredicate<"FeatureCRC", "crc">;	AssemblerPredicate<"FeatureCRC", "crc">;
	def IsLE : Predicate<"Subtarget->isLittleEndian()">;	def IsLE : Predicate<"Subtarget->isLittleEndian()">;
	def IsBE : Predicate<"!Subtarget->isLittleEndian()">;	def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
		def IsCyclone : Predicate<"Subtarget->isCyclone()">;

	//===----------------------------------------------------------------------===//	//===----------------------------------------------------------------------===//
	// AArch64-specific DAG Nodes.	// AArch64-specific DAG Nodes.
Context not available.
	0),	0),
	dsub)),	dsub)),
	0),	0),
	ssub)))>, Requires<[NotForCodeSize]>;	ssub)))>, Requires<[NotForCodeSize, IsCyclone]>;

	def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext),	def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext),
	(LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>;	(LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>;
Context not available.
	0),	0),
	dsub)),	dsub)),
	0),	0),
	dsub)))>, Requires<[NotForCodeSize]>;	dsub)))>, Requires<[NotForCodeSize, IsCyclone]>;

	def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),	def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
	(LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;	(LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
	def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),	def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
Context not available.

test/CodeGen/AArch64/arm64-scvt.ll

	; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple \| FileCheck %s	; RUN: llc < %s -march=arm64 -mcpu=cyclone -aarch64-neon-syntax=apple \| FileCheck %s
		; RUN: llc < %s -march=arm64 -mcpu=cortex-a57 \| FileCheck --check-prefix=CHECK-A57 %s
	; rdar://13082402	; rdar://13082402

	define float @t1(i32* nocapture %src) nounwind ssp {	define float @t1(i32* nocapture %src) nounwind ssp {
Context not available.
	; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0	; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
	; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]	; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
	; CHECK-NEXT: fmul s0, [[REG]], [[REG]]	; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
		; CHECK-A57-LABEL: sfct1:
		; CHECK-A57: ldrsb w[[REGNUM:[0-9]+]], [x0, #1]
		; CHECK-A57-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]]
		; CHECK-A57-NEXT: fmul s0, [[REG]], [[REG]]
	entry:	entry:
	%addr = getelementptr i8* %sp0, i64 1	%addr = getelementptr i8* %sp0, i64 1
	%pix_sp0.0.copyload = load i8* %addr, align 1	%pix_sp0.0.copyload = load i8* %addr, align 1
Context not available.
	; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0	; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
	; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]	; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
	; CHECK-NEXT: fmul s0, [[REG]], [[REG]]	; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
		; CHECK-A57-LABEL: sfct5:
		; CHECK-A57: ldrsb w[[REGNUM:[0-9]+]], [x0, x1]
		; CHECK-A57-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]]
		; CHECK-A57-NEXT: fmul s0, [[REG]], [[REG]]
	entry:	entry:
	%addr = getelementptr i8* %sp0, i64 %offset	%addr = getelementptr i8* %sp0, i64 %offset
	%pix_sp0.0.copyload = load i8* %addr, align 1	%pix_sp0.0.copyload = load i8* %addr, align 1
Context not available.
	; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0	; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
	; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]	; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
	; CHECK-NEXT: fmul d0, [[REG]], [[REG]]	; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
		; CHECK-A57-LABEL: sfct10:
		; CHECK-A57: ldrsh w[[REGNUM:[0-9]+]], [x0, #2]
		; CHECK-A57-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
		; CHECK-A57-NEXT: fmul d0, [[REG]], [[REG]]
	entry:	entry:
	%addr = getelementptr i16* %sp0, i64 1	%addr = getelementptr i16* %sp0, i64 1
	%pix_sp0.0.copyload = load i16* %addr, align 1	%pix_sp0.0.copyload = load i16* %addr, align 1
Context not available.
	; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0	; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
	; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]	; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
	; CHECK-NEXT: fmul d0, [[REG]], [[REG]]	; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
		; CHECK-A57-LABEL: sfct14:
		; CHECK-A57: ldrsh w[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
		; CHECK-A57-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
		; CHECK-A57-NEXT: fmul d0, [[REG]], [[REG]]
	entry:	entry:
	%addr = getelementptr i16* %sp0, i64 %offset	%addr = getelementptr i16* %sp0, i64 %offset
	%pix_sp0.0.copyload = load i16* %addr, align 1	%pix_sp0.0.copyload = load i16* %addr, align 1
Context not available.
	; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0	; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
	; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]	; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
	; CHECK-NEXT: fmul s0, [[REG]], [[REG]]	; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
		; CHECK-A57-LABEL: sfct17:
		; CHECK-A57: ldursb w[[REGNUM:[0-9]+]], [x0, #-1]
		; CHECK-A57-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]]
		; CHECK-A57-NEXT: fmul s0, [[REG]], [[REG]]
	%bitcast = ptrtoint i8* %sp0 to i64	%bitcast = ptrtoint i8* %sp0 to i64
	%add = add i64 %bitcast, -1	%add = add i64 %bitcast, -1
	%addr = inttoptr i64 %add to i8*	%addr = inttoptr i64 %add to i8*
Context not available.
	; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0	; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
	; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]	; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
	; CHECK-NEXT: fmul d0, [[REG]], [[REG]]	; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
		; CHECK-A57-LABEL: sfct22:
		; CHECK-A57: ldursh w[[REGNUM:[0-9]+]], [x0, #1]
		; CHECK-A57-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
		; CHECK-A57-NEXT: fmul d0, [[REG]], [[REG]]
	%bitcast = ptrtoint i16* %sp0 to i64	%bitcast = ptrtoint i16* %sp0 to i64
	%add = add i64 %bitcast, 1	%add = add i64 %bitcast, 1
	%addr = inttoptr i64 %add to i16*	%addr = inttoptr i64 %add to i16*
Context not available.