diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -6790,7 +6790,8 @@ INST, hsub), 0), - ssub)))>, Requires<[NotForCodeSize, HasNEON]>; + ssub)))>, + Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; @@ -6843,7 +6844,8 @@ INST, ssub), 0), - dsub)))>, Requires<[NotForCodeSize, HasNEON]>; + dsub)))>, + Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>; def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext), (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>; diff --git a/llvm/test/CodeGen/AArch64/arm64-scvt.ll b/llvm/test/CodeGen/AArch64/arm64-scvt.ll --- a/llvm/test/CodeGen/AArch64/arm64-scvt.ll +++ b/llvm/test/CodeGen/AArch64/arm64-scvt.ll @@ -486,13 +486,20 @@ } define float @sfct2(i16* nocapture %sp0) { -; CHECK-LABEL: sfct2: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr h0, [x0, #2] -; CHECK-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: scvtf s0, s0 -; CHECK-NEXT: fmul s0, s0, s0 -; CHECK-NEXT: ret +; CHECK-CYC-LABEL: sfct2: +; CHECK-CYC: // %bb.0: // %entry +; CHECK-CYC-NEXT: ldr h0, [x0, #2] +; CHECK-CYC-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-CYC-NEXT: scvtf s0, s0 +; CHECK-CYC-NEXT: fmul s0, s0, s0 +; CHECK-CYC-NEXT: ret +; +; CHECK-A57-LABEL: sfct2: +; CHECK-A57: // %bb.0: // %entry +; CHECK-A57-NEXT: ldrsh w8, [x0, #2] +; CHECK-A57-NEXT: scvtf s0, w8 +; CHECK-A57-NEXT: fmul s0, s0, s0 +; CHECK-A57-NEXT: ret entry: %addr = getelementptr i16, i16* %sp0, i64 1 %pix_sp0.0.copyload = load i16, i16* %addr, align 1 @@ -558,13 +565,20 @@ } define float @sfct6(i16* nocapture %sp0, i64 %offset) { -; CHECK-LABEL: sfct6: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr h0, [x0, x1, lsl #1] -; CHECK-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: scvtf s0, s0 -; CHECK-NEXT: fmul s0, s0, s0 -; CHECK-NEXT: ret +; CHECK-CYC-LABEL: sfct6: +; CHECK-CYC: // %bb.0: // %entry +; CHECK-CYC-NEXT: ldr h0, [x0, x1, lsl #1] +; CHECK-CYC-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-CYC-NEXT: scvtf s0, s0 +; CHECK-CYC-NEXT: fmul s0, s0, s0 +; CHECK-CYC-NEXT: ret +; +; CHECK-A57-LABEL: sfct6: +; CHECK-A57: // %bb.0: // %entry +; CHECK-A57-NEXT: ldrsh w8, [x0, x1, lsl #1] +; CHECK-A57-NEXT: scvtf s0, w8 +; CHECK-A57-NEXT: fmul s0, s0, s0 +; CHECK-A57-NEXT: ret entry: %addr = getelementptr i16, i16* %sp0, i64 %offset %pix_sp0.0.copyload = load i16, i16* %addr, align 1 @@ -645,13 +659,20 @@ } define double @sfct11(i32* nocapture %sp0) { -; CHECK-LABEL: sfct11: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr s0, [x0, #4] -; CHECK-NEXT: sshll v0.2d, v0.2s, #0 -; CHECK-NEXT: scvtf d0, d0 -; CHECK-NEXT: fmul d0, d0, d0 -; CHECK-NEXT: ret +; CHECK-CYC-LABEL: sfct11: +; CHECK-CYC: // %bb.0: // %entry +; CHECK-CYC-NEXT: ldr s0, [x0, #4] +; CHECK-CYC-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-CYC-NEXT: scvtf d0, d0 +; CHECK-CYC-NEXT: fmul d0, d0, d0 +; CHECK-CYC-NEXT: ret +; +; CHECK-A57-LABEL: sfct11: +; CHECK-A57: // %bb.0: // %entry +; CHECK-A57-NEXT: ldr w8, [x0, #4] +; CHECK-A57-NEXT: scvtf d0, w8 +; CHECK-A57-NEXT: fmul d0, d0, d0 +; CHECK-A57-NEXT: ret entry: %addr = getelementptr i32, i32* %sp0, i64 1 %pix_sp0.0.copyload = load i32, i32* %addr, align 1 @@ -716,13 +737,20 @@ } define double @sfct15(i32* nocapture %sp0, i64 %offset) { -; CHECK-LABEL: sfct15: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr s0, [x0, x1, lsl #2] -; CHECK-NEXT: sshll v0.2d, v0.2s, #0 -; CHECK-NEXT: scvtf d0, d0 -; CHECK-NEXT: fmul d0, d0, d0 -; CHECK-NEXT: ret +; CHECK-CYC-LABEL: sfct15: +; CHECK-CYC: // %bb.0: // %entry +; CHECK-CYC-NEXT: ldr s0, [x0, x1, lsl #2] +; CHECK-CYC-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-CYC-NEXT: scvtf d0, d0 +; CHECK-CYC-NEXT: fmul d0, d0, d0 +; CHECK-CYC-NEXT: ret +; +; CHECK-A57-LABEL: sfct15: +; CHECK-A57: // %bb.0: // %entry +; CHECK-A57-NEXT: ldr w8, [x0, x1, lsl #2] +; CHECK-A57-NEXT: scvtf d0, w8 +; CHECK-A57-NEXT: fmul d0, d0, d0 +; CHECK-A57-NEXT: ret entry: %addr = getelementptr i32, i32* %sp0, i64 %offset %pix_sp0.0.copyload = load i32, i32* %addr, align 1 @@ -774,13 +802,20 @@ } define float @sfct18(i16* nocapture %sp0) { -; CHECK-LABEL: sfct18: -; CHECK: // %bb.0: -; CHECK-NEXT: ldur h0, [x0, #1] -; CHECK-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: scvtf s0, s0 -; CHECK-NEXT: fmul s0, s0, s0 -; CHECK-NEXT: ret +; CHECK-CYC-LABEL: sfct18: +; CHECK-CYC: // %bb.0: +; CHECK-CYC-NEXT: ldur h0, [x0, #1] +; CHECK-CYC-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-CYC-NEXT: scvtf s0, s0 +; CHECK-CYC-NEXT: fmul s0, s0, s0 +; CHECK-CYC-NEXT: ret +; +; CHECK-A57-LABEL: sfct18: +; CHECK-A57: // %bb.0: +; CHECK-A57-NEXT: ldursh w8, [x0, #1] +; CHECK-A57-NEXT: scvtf s0, w8 +; CHECK-A57-NEXT: fmul s0, s0, s0 +; CHECK-A57-NEXT: ret %bitcast = ptrtoint i16* %sp0 to i64 %add = add i64 %bitcast, 1 %addr = inttoptr i64 %add to i16* @@ -868,13 +903,20 @@ } define double @sfct23(i32* nocapture %sp0) { -; CHECK-LABEL: sfct23: -; CHECK: // %bb.0: -; CHECK-NEXT: ldur s0, [x0, #1] -; CHECK-NEXT: sshll v0.2d, v0.2s, #0 -; CHECK-NEXT: scvtf d0, d0 -; CHECK-NEXT: fmul d0, d0, d0 -; CHECK-NEXT: ret +; CHECK-CYC-LABEL: sfct23: +; CHECK-CYC: // %bb.0: +; CHECK-CYC-NEXT: ldur s0, [x0, #1] +; CHECK-CYC-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-CYC-NEXT: scvtf d0, d0 +; CHECK-CYC-NEXT: fmul d0, d0, d0 +; CHECK-CYC-NEXT: ret +; +; CHECK-A57-LABEL: sfct23: +; CHECK-A57: // %bb.0: +; CHECK-A57-NEXT: ldur w8, [x0, #1] +; CHECK-A57-NEXT: scvtf d0, w8 +; CHECK-A57-NEXT: fmul d0, d0, d0 +; CHECK-A57-NEXT: ret %bitcast = ptrtoint i32* %sp0 to i64 %add = add i64 %bitcast, 1 %addr = inttoptr i64 %add to i32* diff --git a/llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll b/llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll --- a/llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll +++ b/llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ;; These test functions previously triggered the following error when emitting machine code: ;; LLVM ERROR: Attempting to emit UCVTFv1i64 instruction but the Feature_HasNEON predicate(s) are not met -; RUN: llc -mtriple=aarch64 -mattr=+neon,+fullfp16 < %s | FileCheck %s --check-prefixes=CHECK,NEON-ENABLED -; RUN: llc -mtriple=aarch64 -mattr=-neon,+fullfp16 < %s | FileCheck %s --check-prefixes=CHECK,NEON-DISABLED +; RUN: llc -mtriple=aarch64 -mattr=+neon,+fullfp16,+alternate-sextload-cvt-f32-pattern < %s | FileCheck %s --check-prefixes=CHECK,NEON-ENABLED +; RUN: llc -mtriple=aarch64 -mattr=-neon,+fullfp16,+alternate-sextload-cvt-f32-pattern < %s | FileCheck %s --check-prefixes=CHECK,NEON-DISABLED ;; Emit an object file so that verifyPredicates is called (it is not used for ASM output). ; RUN: llc -mtriple=aarch64 -mattr=-neon,+fullfp16 -o /dev/null %s --asm-show-inst -filetype=obj @@ -206,11 +206,19 @@ } define float @si8_to_float(i8* %i, float* %f) { -; CHECK-LABEL: si8_to_float: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrsb w8, [x0] -; CHECK-NEXT: scvtf s0, w8 -; CHECK-NEXT: ret +; NEON-ENABLED-LABEL: si8_to_float: +; NEON-ENABLED: // %bb.0: // %entry +; NEON-ENABLED-NEXT: ldr b0, [x0] +; NEON-ENABLED-NEXT: sshll v0.8h, v0.8b, #0 +; NEON-ENABLED-NEXT: sshll v0.4s, v0.4h, #0 +; NEON-ENABLED-NEXT: scvtf s0, s0 +; NEON-ENABLED-NEXT: ret +; +; NEON-DISABLED-LABEL: si8_to_float: +; NEON-DISABLED: // %bb.0: // %entry +; NEON-DISABLED-NEXT: ldrsb w8, [x0] +; NEON-DISABLED-NEXT: scvtf s0, w8 +; NEON-DISABLED-NEXT: ret entry: %ld = load i8, i8* %i, align 1 %conv = sitofp i8 %ld to float @@ -230,11 +238,19 @@ } define double @si16_to_double(i16* %i, float* %f) { -; CHECK-LABEL: si16_to_double: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrsh w8, [x0] -; CHECK-NEXT: scvtf d0, w8 -; CHECK-NEXT: ret +; NEON-ENABLED-LABEL: si16_to_double: +; NEON-ENABLED: // %bb.0: // %entry +; NEON-ENABLED-NEXT: ldr h0, [x0] +; NEON-ENABLED-NEXT: sshll v0.4s, v0.4h, #0 +; NEON-ENABLED-NEXT: sshll v0.2d, v0.2s, #0 +; NEON-ENABLED-NEXT: scvtf d0, d0 +; NEON-ENABLED-NEXT: ret +; +; NEON-DISABLED-LABEL: si16_to_double: +; NEON-DISABLED: // %bb.0: // %entry +; NEON-DISABLED-NEXT: ldrsh w8, [x0] +; NEON-DISABLED-NEXT: scvtf d0, w8 +; NEON-DISABLED-NEXT: ret entry: %ld = load i16, i16* %i, align 1 %conv = sitofp i16 %ld to double