Skip to content

Commit 451f30e

Browse files
author
Jiangning Liu
committedJul 24, 2014
[AArch64] Disable some optimization cases for type conversion from sint to fp, because those optimization cases are micro-architecture dependent and only make sense for Cyclone. A new predicate Cyclone is introduced in .td file.
llvm-svn: 213827
1 parent 933cccf commit 451f30e

File tree

2 files changed

+30
-4
lines changed

2 files changed

+30
-4
lines changed
 

‎llvm/lib/Target/AArch64/AArch64InstrInfo.td

+4-3
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ def HasCRC : Predicate<"Subtarget->hasCRC()">,
2424
AssemblerPredicate<"FeatureCRC", "crc">;
2525
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
2626
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
27+
def IsCyclone : Predicate<"Subtarget->isCyclone()">;
2728

2829
//===----------------------------------------------------------------------===//
2930
// AArch64-specific DAG Nodes.
@@ -4386,7 +4387,7 @@ class SExtLoadi8CVTf32Pat<dag addrmode, dag INST>
43864387
0),
43874388
dsub)),
43884389
0),
4389-
ssub)))>, Requires<[NotForCodeSize]>;
4390+
ssub)))>, Requires<[NotForCodeSize, IsCyclone]>;
43904391

43914392
def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext),
43924393
(LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>;
@@ -4439,8 +4440,8 @@ class SExtLoadi16CVTf64Pat<dag addrmode, dag INST>
44394440
0),
44404441
dsub)),
44414442
0),
4442-
dsub)))>, Requires<[NotForCodeSize]>;
4443-
4443+
dsub)))>, Requires<[NotForCodeSize, IsCyclone]>;
4444+
44444445
def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
44454446
(LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
44464447
def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),

‎llvm/test/CodeGen/AArch64/arm64-scvt.ll

+26-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
1+
; RUN: llc < %s -march=arm64 -mcpu=cyclone -aarch64-neon-syntax=apple | FileCheck %s
2+
; RUN: llc < %s -march=arm64 -mcpu=cortex-a57 | FileCheck --check-prefix=CHECK-A57 %s
23
; rdar://13082402
34

45
define float @t1(i32* nocapture %src) nounwind ssp {
@@ -409,6 +410,10 @@ define float @sfct1(i8* nocapture %sp0) {
409410
; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
410411
; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
411412
; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
413+
; CHECK-A57-LABEL: sfct1:
414+
; CHECK-A57: ldrsb w[[REGNUM:[0-9]+]], [x0, #1]
415+
; CHECK-A57-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]]
416+
; CHECK-A57-NEXT: fmul s0, [[REG]], [[REG]]
412417
entry:
413418
%addr = getelementptr i8* %sp0, i64 1
414419
%pix_sp0.0.copyload = load i8* %addr, align 1
@@ -466,6 +471,10 @@ define float @sfct5(i8* nocapture %sp0, i64 %offset) {
466471
; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
467472
; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
468473
; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
474+
; CHECK-A57-LABEL: sfct5:
475+
; CHECK-A57: ldrsb w[[REGNUM:[0-9]+]], [x0, x1]
476+
; CHECK-A57-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]]
477+
; CHECK-A57-NEXT: fmul s0, [[REG]], [[REG]]
469478
entry:
470479
%addr = getelementptr i8* %sp0, i64 %offset
471480
%pix_sp0.0.copyload = load i8* %addr, align 1
@@ -536,6 +545,10 @@ define double @sfct10(i16* nocapture %sp0) {
536545
; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
537546
; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
538547
; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
548+
; CHECK-A57-LABEL: sfct10:
549+
; CHECK-A57: ldrsh w[[REGNUM:[0-9]+]], [x0, #2]
550+
; CHECK-A57-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
551+
; CHECK-A57-NEXT: fmul d0, [[REG]], [[REG]]
539552
entry:
540553
%addr = getelementptr i16* %sp0, i64 1
541554
%pix_sp0.0.copyload = load i16* %addr, align 1
@@ -592,6 +605,10 @@ define double @sfct14(i16* nocapture %sp0, i64 %offset) {
592605
; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
593606
; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
594607
; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
608+
; CHECK-A57-LABEL: sfct14:
609+
; CHECK-A57: ldrsh w[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
610+
; CHECK-A57-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
611+
; CHECK-A57-NEXT: fmul d0, [[REG]], [[REG]]
595612
entry:
596613
%addr = getelementptr i16* %sp0, i64 %offset
597614
%pix_sp0.0.copyload = load i16* %addr, align 1
@@ -636,6 +653,10 @@ entry:
636653
; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
637654
; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
638655
; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
656+
; CHECK-A57-LABEL: sfct17:
657+
; CHECK-A57: ldursb w[[REGNUM:[0-9]+]], [x0, #-1]
658+
; CHECK-A57-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]]
659+
; CHECK-A57-NEXT: fmul s0, [[REG]], [[REG]]
639660
%bitcast = ptrtoint i8* %sp0 to i64
640661
%add = add i64 %bitcast, -1
641662
%addr = inttoptr i64 %add to i8*
@@ -713,6 +734,10 @@ define double @sfct22(i16* nocapture %sp0) {
713734
; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
714735
; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
715736
; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
737+
; CHECK-A57-LABEL: sfct22:
738+
; CHECK-A57: ldursh w[[REGNUM:[0-9]+]], [x0, #1]
739+
; CHECK-A57-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
740+
; CHECK-A57-NEXT: fmul d0, [[REG]], [[REG]]
716741
%bitcast = ptrtoint i16* %sp0 to i64
717742
%add = add i64 %bitcast, 1
718743
%addr = inttoptr i64 %add to i16*

0 commit comments

Comments
 (0)
Please sign in to comment.