diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -394,7 +394,7 @@ bool useSVEForFixedLengthVectors() const { if (!isNeonAvailable()) - return true; + return hasSVE(); // Prefer NEON unless larger SVE registers are available. return hasSVE() && getMinSVEVectorSizeInBits() >= 256; diff --git a/llvm/test/Analysis/CostModel/AArch64/no-sve-no-neon.ll b/llvm/test/Analysis/CostModel/AArch64/no-sve-no-neon.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/no-sve-no-neon.ll @@ -0,0 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2 +; RUN: opt < %s -passes="print" 2>&1 -disable-output | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + +define void @uitofp() #0 { +; CHECK-LABEL: 'uitofp' +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %conv = uitofp <16 x i64> undef to <16 x float> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %conv = uitofp <16 x i64> undef to <16 x float> + ret void +} + +attributes #0 = { "target-features"="-neon" } diff --git a/llvm/test/CodeGen/AArch64/no-sve-no-neon.ll b/llvm/test/CodeGen/AArch64/no-sve-no-neon.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/no-sve-no-neon.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=-neon < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu-elf" + +define <16 x float> @foo(<16 x i64> %a) { +; CHECK-LABEL: foo: +; CHECK: // %bb.0: +; CHECK-NEXT: ldp x10, x9, [sp, #48] +; CHECK-NEXT: ldp x12, x11, [sp, #32] +; CHECK-NEXT: ucvtf s1, x10 +; CHECK-NEXT: ucvtf s0, x9 +; CHECK-NEXT: ldp x13, x9, [sp, #16] +; CHECK-NEXT: ucvtf s2, x11 +; CHECK-NEXT: ucvtf s3, x12 +; CHECK-NEXT: ldp x11, x10, [sp] +; CHECK-NEXT: str s0, [x8, #60] +; CHECK-NEXT: ucvtf s0, x13 +; CHECK-NEXT: str s1, [x8, #56] +; CHECK-NEXT: ucvtf s4, x9 +; CHECK-NEXT: str s2, [x8, #52] +; CHECK-NEXT: ucvtf s2, x11 +; CHECK-NEXT: str s3, [x8, #48] +; CHECK-NEXT: ucvtf s1, x10 +; CHECK-NEXT: ucvtf s3, x7 +; CHECK-NEXT: str s0, [x8, #40] +; CHECK-NEXT: ucvtf s0, x5 +; CHECK-NEXT: str s4, [x8, #44] +; CHECK-NEXT: ucvtf s4, x6 +; CHECK-NEXT: str s2, [x8, #32] +; CHECK-NEXT: ucvtf s2, x3 +; CHECK-NEXT: str s1, [x8, #36] +; CHECK-NEXT: ucvtf s1, x4 +; CHECK-NEXT: str s3, [x8, #28] +; CHECK-NEXT: ucvtf s3, x2 +; CHECK-NEXT: str s4, [x8, #24] +; CHECK-NEXT: ucvtf s4, x1 +; CHECK-NEXT: str s0, [x8, #20] +; CHECK-NEXT: ucvtf s0, x0 +; CHECK-NEXT: str s1, [x8, #16] +; CHECK-NEXT: str s2, [x8, #12] +; CHECK-NEXT: str s3, [x8, #8] +; CHECK-NEXT: str s4, [x8, #4] +; CHECK-NEXT: str s0, [x8] +; CHECK-NEXT: ret + %conv1 = uitofp <16 x i64> %a to <16 x float> + ret <16 x float> %conv1 +}