Diff 446493

clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c

	// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -O2 -S -o - %s -mvscale-min=2 -mvscale-max=2 \| FileCheck %s --check-prefixes=CHECK,CHECK256			// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -O2 -S \
	// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -O2 -S -o - %s -mvscale-min=4 -mvscale-max=4 \| FileCheck %s --check-prefixes=CHECK,CHECK512			// RUN: -mllvm -prefer-predicate-over-epilogue=scalar-epilogue -o - %s -mvscale-min=2 -mvscale-max=2 \| FileCheck %s --check-prefixes=CHECK,CHECK256
	// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -O2 -S -o - %s -mvscale-min=8 -mvscale-max=8 \| FileCheck %s --check-prefixes=CHECK,CHECK1024			// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -O2 -S \
	// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -O2 -S -o - %s -mvscale-min=16 -mvscale-max=16 \| FileCheck %s --check-prefixes=CHECK,CHECK2048			// RUN: -mllvm -prefer-predicate-over-epilogue=scalar-epilogue -o - %s -mvscale-min=4 -mvscale-max=4 \| FileCheck %s --check-prefixes=CHECK,CHECK512
				// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -O2 -S \
				// RUN: -mllvm -prefer-predicate-over-epilogue=scalar-epilogue -o - %s -mvscale-min=8 -mvscale-max=8 \| FileCheck %s --check-prefixes=CHECK,CHECK1024
				// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -O2 -S \
				// RUN: -mllvm -prefer-predicate-over-epilogue=scalar-epilogue -o - %s -mvscale-min=16 -mvscale-max=16 \| FileCheck %s --check-prefixes=CHECK,CHECK2048

	// REQUIRES: aarch64-registered-target			// REQUIRES: aarch64-registered-target

	#include <arm_sve.h>			#include <arm_sve.h>

	void func(int restrict a, int restrict b) {			void func(int restrict a, int restrict b) {
	// CHECK-LABEL: func			// CHECK-LABEL: func
	// CHECK256-COUNT-8: st1w			// CHECK256-COUNT-8: st1w
	// CHECK512-COUNT-4: st1w			// CHECK512-COUNT-4: st1w
	// CHECK1024-COUNT-2: st1w			// CHECK1024-COUNT-2: st1w
	// CHECK2048-COUNT-1: st1w			// CHECK2048-COUNT-1: st1w
	#pragma clang loop vectorize(enable)			#pragma clang loop vectorize(enable)
	for (int i = 0; i < 64; ++i)			for (int i = 0; i < 64; ++i)
	a[i] += b[i];			a[i] += b[i];
	}			}

llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt < %s -loop-vectorize -mtriple=aarch64--linux-gnu -mattr=+neon -force-vector-width=2 -force-vector-interleave=1 -S -o - \| FileCheck %s			; RUN: opt < %s -loop-vectorize -mtriple=aarch64--linux-gnu -mattr=+neon -force-vector-width=2 -force-vector-interleave=1 \
	; RUN: opt < %s -loop-vectorize -mtriple=aarch64--linux-gnu -mattr=+sve -force-vector-width=2 -force-vector-interleave=1 -scalable-vectorization=on -S -o - \| FileCheck --check-prefix=SVE %s			; RUN: -prefer-predicate-over-epilogue=scalar-epilogue -S -o - \| FileCheck %s
				; RUN: opt < %s -loop-vectorize -mtriple=aarch64--linux-gnu -mattr=+sve -force-vector-width=2 -force-vector-interleave=1 \
				; RUN: -prefer-predicate-over-epilogue=scalar-epilogue -scalable-vectorization=on -S -o - \| FileCheck --check-prefix=SVE %s

	define dso_local double @test(ptr nocapture noundef readonly %data, ptr nocapture noundef readonly %offset, i32 noundef %size) local_unnamed_addr {			define dso_local double @test(ptr nocapture noundef readonly %data, ptr nocapture noundef readonly %offset, i32 noundef %size) local_unnamed_addr {
	; CHECK-LABEL: @test(			; CHECK-LABEL: @test(
	; CHECK-NEXT: entry:			; CHECK-NEXT: entry:
	; CHECK-NEXT: [[CMP6:%.]] = icmp sgt i32 [[SIZE:%.]], 0			; CHECK-NEXT: [[CMP6:%.]] = icmp sgt i32 [[SIZE:%.]], 0
	; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.]], label [[FOR_COND_CLEANUP:%.]]			; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.]], label [[FOR_COND_CLEANUP:%.]]
	; CHECK: for.body.preheader:			; CHECK: for.body.preheader:
	; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64			; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SIZE]] to i64
	▲ Show 20 Lines • Show All 90 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/i1-reg-usage.ll

	; RUN: opt -loop-vectorize -debug-only=loop-vectorize -disable-output 2>&1 < %s \| FileCheck %s			; RUN: opt -loop-vectorize -debug-only=loop-vectorize -disable-output -prefer-predicate-over-epilogue=scalar-epilogue 2>&1 < %s \| FileCheck %s
	; REQUIRES: asserts			; REQUIRES: asserts

	target triple = "aarch64"			target triple = "aarch64"

	; Test that shows how many registers the loop vectorizer thinks an illegal <VF x i1> will consume.			; Test that shows how many registers the loop vectorizer thinks an illegal <VF x i1> will consume.

	; CHECK-LABEL: LV: Checking a loop in 'or_reduction_neon' from <stdin>			; CHECK-LABEL: LV: Checking a loop in 'or_reduction_neon' from <stdin>
	; CHECK: LV(REG): VF = 32			; CHECK: LV(REG): VF = 32
	▲ Show 20 Lines • Show All 48 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll

	; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu \			; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu \
	; RUN: -pass-remarks-missed=loop-vectorize < %s 2>%t \| FileCheck %s			; RUN: -prefer-predicate-over-epilogue=scalar-epilogue -pass-remarks-missed=loop-vectorize < %s 2>%t \| FileCheck %s
	; RUN: cat %t \| FileCheck %s --check-prefix=CHECK-REMARKS			; RUN: cat %t \| FileCheck %s --check-prefix=CHECK-REMARKS
	; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -force-target-instruction-cost=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu \			; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -force-target-instruction-cost=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu \
	; RUN: -pass-remarks-missed=loop-vectorize < %s 2>%t \| FileCheck %s			; RUN: -prefer-predicate-over-epilogue=scalar-epilogue -pass-remarks-missed=loop-vectorize < %s 2>%t \| FileCheck %s
	; RUN: cat %t \| FileCheck %s --check-prefix=CHECK-REMARKS			; RUN: cat %t \| FileCheck %s --check-prefix=CHECK-REMARKS

	define void @vec_load(i64 %N, double* nocapture %a, double* nocapture readonly %b) {			define void @vec_load(i64 %N, double* nocapture %a, double* nocapture readonly %b) {
	; CHECK-LABEL: @vec_load			; CHECK-LABEL: @vec_load
	; CHECK: vector.body:			; CHECK: vector.body:
	; CHECK: %[[LOAD:.]] = load <vscale x 2 x double>, <vscale x 2 x double>			; CHECK: %[[LOAD:.]] = load <vscale x 2 x double>, <vscale x 2 x double>
	; CHECK: call <vscale x 2 x double> @foo_vec(<vscale x 2 x double> %[[LOAD]])			; CHECK: call <vscale x 2 x double> @foo_vec(<vscale x 2 x double> %[[LOAD]])
	entry:			entry:
	▲ Show 20 Lines • Show All 237 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-vector-interleave=1 -force-vector-width=4 -prefer-inloop-reductions -S \| FileCheck %s			; RUN: opt < %s -loop-vectorize -prefer-predicate-over-epilogue=scalar-epilogue -mtriple aarch64-unknown-linux-gnu \
				; RUN: -mattr=+sve -force-vector-interleave=1 -force-vector-width=4 -prefer-inloop-reductions -S \| FileCheck %s

	define float @cond_fadd(float* noalias nocapture readonly %a, float* noalias nocapture readonly %cond, i64 %N){			define float @cond_fadd(float* noalias nocapture readonly %a, float* noalias nocapture readonly %cond, i64 %N){
	; CHECK-LABEL: @cond_fadd(			; CHECK-LABEL: @cond_fadd(
	; CHECK-NEXT: entry:			; CHECK-NEXT: entry:
	; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()			; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
	; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4			; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
	; CHECK-NEXT: [[MIN_ITERS_CHECK:%.]] = icmp ult i64 [[N:%.]], [[TMP1]]			; CHECK-NEXT: [[MIN_ITERS_CHECK:%.]] = icmp ult i64 [[N:%.]], [[TMP1]]
	; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]			; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
	▲ Show 20 Lines • Show All 176 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll

	; RUN: opt < %s -loop-vectorize -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve,+bf16 -S 2>%t \| FileCheck %s -check-prefix=CHECK			; RUN: opt < %s -loop-vectorize -prefer-predicate-over-epilogue=scalar-epilogue -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize \
				; RUN: -pass-remarks-missed=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve,+bf16 -S 2>%t \| FileCheck %s -check-prefix=CHECK
	; RUN: cat %t \| FileCheck %s -check-prefix=CHECK-REMARK			; RUN: cat %t \| FileCheck %s -check-prefix=CHECK-REMARK

	; Reduction can be vectorized			; Reduction can be vectorized

	; ADD			; ADD

	; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)			; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
	define i32 @add(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {			define i32 @add(i32* nocapture %a, i32* nocapture readonly %b, i64 %n) {
	▲ Show 20 Lines • Show All 424 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll

	; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=false -hints-allow-reordering=false -S \| FileCheck %s --check-prefix=CHECK-NOT-VECTORIZED			; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -prefer-predicate-over-epilogue=scalar-epilogue \
	; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=false -hints-allow-reordering=true -S \| FileCheck %s --check-prefix=CHECK-UNORDERED			; RUN: -force-ordered-reductions=false -hints-allow-reordering=false -S \| FileCheck %s --check-prefix=CHECK-NOT-VECTORIZED
	; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=true -hints-allow-reordering=false -S \| FileCheck %s --check-prefix=CHECK-ORDERED			; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -prefer-predicate-over-epilogue=scalar-epilogue \
	; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=true -hints-allow-reordering=true -S \| FileCheck %s --check-prefix=CHECK-UNORDERED			; RUN: -force-ordered-reductions=false -hints-allow-reordering=true -S \| FileCheck %s --check-prefix=CHECK-UNORDERED
	; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -hints-allow-reordering=false -S \| FileCheck %s --check-prefix=CHECK-ORDERED			; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -prefer-predicate-over-epilogue=scalar-epilogue \
	; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -hints-allow-reordering=false \			; RUN: -force-ordered-reductions=true -hints-allow-reordering=false -S \| FileCheck %s --check-prefix=CHECK-ORDERED
	; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -S \| FileCheck %s --check-prefix=CHECK-ORDERED-TF			; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -prefer-predicate-over-epilogue=scalar-epilogue \
				; RUN: -force-ordered-reductions=true -hints-allow-reordering=true -S \| FileCheck %s --check-prefix=CHECK-UNORDERED
				; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -prefer-predicate-over-epilogue=scalar-epilogue \
				; RUN: -hints-allow-reordering=false -S \| FileCheck %s --check-prefix=CHECK-ORDERED
				; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \
				; RUN: -hints-allow-reordering=false -S \| FileCheck %s --check-prefix=CHECK-ORDERED-TF

	define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) #0 {			define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) #0 {
	; CHECK-ORDERED-LABEL: @fadd_strict			; CHECK-ORDERED-LABEL: @fadd_strict
	; CHECK-ORDERED: vector.body:			; CHECK-ORDERED: vector.body:
	; CHECK-ORDERED: %[[VEC_PHI:.]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX:.]], %vector.body ]			; CHECK-ORDERED: %[[VEC_PHI:.]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX:.]], %vector.body ]
	; CHECK-ORDERED: %[[LOAD:.]] = load <vscale x 8 x float>, <vscale x 8 x float>			; CHECK-ORDERED: %[[LOAD:.]] = load <vscale x 8 x float>, <vscale x 8 x float>
	; CHECK-ORDERED: %[[RDX]] = call float @llvm.vector.reduce.fadd.nxv8f32(float %[[VEC_PHI]], <vscale x 8 x float> %[[LOAD]])			; CHECK-ORDERED: %[[RDX]] = call float @llvm.vector.reduce.fadd.nxv8f32(float %[[VEC_PHI]], <vscale x 8 x float> %[[LOAD]])
	; CHECK-ORDERED: for.end			; CHECK-ORDERED: for.end
	▲ Show 20 Lines • Show All 716 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/scalarize-store-with-predication.ll

	; RUN: opt -loop-vectorize -force-vector-width=1 -force-vector-interleave=2 -S -o - < %s \| FileCheck %s			; RUN: opt -loop-vectorize -force-vector-width=1 -force-vector-interleave=2 \
	; RUN: opt -mattr=+sve -loop-vectorize -force-vector-width=1 -force-vector-interleave=2 -S -o - < %s \| FileCheck %s			; RUN: -prefer-predicate-over-epilogue=scalar-epilogue -S -o - < %s \| FileCheck %s
				; RUN: opt -mattr=+sve -loop-vectorize -force-vector-width=1 -force-vector-interleave=2 \
				; RUN: -prefer-predicate-over-epilogue=scalar-epilogue -S -o - < %s \| FileCheck %s

	target triple = "aarch64-unknown-linux-gnu"			target triple = "aarch64-unknown-linux-gnu"

	; This test is defending against a bug that appeared when we have a target			; This test is defending against a bug that appeared when we have a target
	; configuration where masked loads/stores are legal -- e.g. AArch64 with SVE.			; configuration where masked loads/stores are legal -- e.g. AArch64 with SVE.
	; Predication would not be applied during interleaving, enabling the			; Predication would not be applied during interleaving, enabling the
	; possibility of superfluous loads/stores which could result in miscompiles.			; possibility of superfluous loads/stores which could result in miscompiles.
	; This test checks that, when we disable vectorisation and force interleaving,			; This test checks that, when we disable vectorisation and force interleaving,
	▲ Show 20 Lines • Show All 42 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll

	; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve < %s -S \| FileCheck %s			; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve \
				; RUN: -prefer-predicate-over-epilogue=scalar-epilogue < %s -S \| FileCheck %s


	target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"			target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
	target triple = "aarch64-unknown-linux-gnu"			target triple = "aarch64-unknown-linux-gnu"

	define void @cmpsel_i32(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i64 %n) {			define void @cmpsel_i32(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i64 %n) {
	; CHECK-LABEL: @cmpsel_i32(			; CHECK-LABEL: @cmpsel_i32(
	; CHECK-NEXT: entry:			; CHECK-NEXT: entry:
	▲ Show 20 Lines • Show All 88 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -o - \| FileCheck %s			; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve \
				; RUN: -prefer-predicate-over-epilogue=scalar-epilogue -S %s -o - \| FileCheck %s

	define void @cond_inv_load_i32i32i16(i32* noalias nocapture %a, i32* noalias nocapture readonly %cond, i16* noalias nocapture readonly %inv, i64 %n) #0 {			define void @cond_inv_load_i32i32i16(i32* noalias nocapture %a, i32* noalias nocapture readonly %cond, i16* noalias nocapture readonly %inv, i64 %n) #0 {
	; CHECK-LABEL: @cond_inv_load_i32i32i16(			; CHECK-LABEL: @cond_inv_load_i32i32i16(
	; CHECK-NEXT: entry:			; CHECK-NEXT: entry:
	; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()			; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
	; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2			; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
	; CHECK-NEXT: [[MIN_ITERS_CHECK:%.]] = icmp ugt i64 [[TMP1]], [[N:%.]]			; CHECK-NEXT: [[MIN_ITERS_CHECK:%.]] = icmp ugt i64 [[TMP1]], [[N:%.]]
	; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]			; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
	▲ Show 20 Lines • Show All 253 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; REQUIRES: asserts			; REQUIRES: asserts
	; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -epilogue-vectorization-minimum-VF=0 --debug-only=loop-vectorize -force-target-instruction-cost=1 -S 2>%t \| FileCheck %s --check-prefix=CHECK			; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -epilogue-vectorization-minimum-VF=0 -prefer-predicate-over-epilogue=scalar-epilogue \
				; RUN: -debug-only=loop-vectorize -force-target-instruction-cost=1 -S 2>%t \| FileCheck %s --check-prefix=CHECK
	; RUN: cat %t \| FileCheck %s --check-prefix=DEBUG			; RUN: cat %t \| FileCheck %s --check-prefix=DEBUG
	; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -epilogue-vectorization-force-VF=8 --debug-only=loop-vectorize -S 2>%t \| FileCheck %s --check-prefix=CHECK-VF8			; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -epilogue-vectorization-force-VF=8 \
				; RUN: -debug-only=loop-vectorize -S 2>%t \| FileCheck %s --check-prefix=CHECK-VF8
	; RUN: cat %t \| FileCheck %s --check-prefix=DEBUG-FORCED			; RUN: cat %t \| FileCheck %s --check-prefix=DEBUG-FORCED

	target triple = "aarch64-linux-gnu"			target triple = "aarch64-linux-gnu"

	; DEBUG: LV: Checking a loop in 'main_vf_vscale_x_16'			; DEBUG: LV: Checking a loop in 'main_vf_vscale_x_16'
	; DEBUG: Create Skeleton for epilogue vectorized loop (first pass)			; DEBUG: Create Skeleton for epilogue vectorized loop (first pass)
	; DEBUG: Main Loop VF:vscale x 16, Main Loop UF:2, Epilogue Loop VF:vscale x 8, Epilogue Loop UF:1			; DEBUG: Main Loop VF:vscale x 16, Main Loop UF:2, Epilogue Loop VF:vscale x 8, Epilogue Loop UF:1

	▲ Show 20 Lines • Show All 356 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt -loop-vectorize -dce -mtriple aarch64-linux-gnu -mattr=+sve < %s -S \| FileCheck %s			; RUN: opt -loop-vectorize -dce -mtriple aarch64-linux-gnu -mattr=+sve \
				; RUN: -prefer-predicate-over-epilogue=scalar-epilogue < %s -S \| FileCheck %s

	target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"			target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
	target triple = "aarch64-unknown-linux-gnu"			target triple = "aarch64-unknown-linux-gnu"

	; This should be vscale x 8 vectorized, maybe with some interleaving.			; This should be vscale x 8 vectorized, maybe with some interleaving.

	define void @fneg(ptr nocapture noundef writeonly %d, ptr nocapture noundef readonly %s, i32 noundef %n) {			define void @fneg(ptr nocapture noundef writeonly %d, ptr nocapture noundef readonly %s, i32 noundef %n) {
	; CHECK-LABEL: @fneg(			; CHECK-LABEL: @fneg(
	▲ Show 20 Lines • Show All 91 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll

	; REQUIRES: asserts			; REQUIRES: asserts
	; RUN: opt -loop-vectorize -mcpu=neoverse-v1 -disable-output %s -debug 2>&1 \| FileCheck %s			; RUN: opt -loop-vectorize -mcpu=neoverse-v1 -disable-output %s -debug \
				; RUN: -prefer-predicate-over-epilogue=scalar-epilogue 2>&1 \| FileCheck %s

	target triple="aarch64--linux-gnu"			target triple="aarch64--linux-gnu"

	; CHECK: LV: Checking a loop in 'gather_nxv4i32_loaded_index'			; CHECK: LV: Checking a loop in 'gather_nxv4i32_loaded_index'
	; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: %1 = load float, float* %arrayidx3, align 4			; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: %1 = load float, float* %arrayidx3, align 4
	define void @gather_nxv4i32_loaded_index(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) #0 {			define void @gather_nxv4i32_loaded_index(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) #0 {
	entry:			entry:
	br label %for.body			br label %for.body
	▲ Show 20 Lines • Show All 179 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -force-target-instruction-cost=1 -o - \| FileCheck %s			; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve \
				; RUN: -prefer-predicate-over-epilogue=scalar-epilogue -S %s -force-target-instruction-cost=1 -o - \| FileCheck %s

	define void @gather_nxv4i32_ind64(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) #0 {			define void @gather_nxv4i32_ind64(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) #0 {
	; CHECK-LABEL: @gather_nxv4i32_ind64(			; CHECK-LABEL: @gather_nxv4i32_ind64(
	; CHECK-NEXT: entry:			; CHECK-NEXT: entry:
	; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()			; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
	; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2			; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
	; CHECK-NEXT: [[MIN_ITERS_CHECK:%.]] = icmp ugt i64 [[TMP1]], [[N:%.]]			; CHECK-NEXT: [[MIN_ITERS_CHECK:%.]] = icmp ugt i64 [[TMP1]], [[N:%.]]
	; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]			; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.]], label [[VECTOR_PH:%.]]
	▲ Show 20 Lines • Show All 371 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll

	; RUN: opt < %s -loop-vectorize -mattr=+sve -force-vector-width=4 -pass-remarks-analysis=loop-vectorize -S 2>%t \| FileCheck %s			; RUN: opt < %s -loop-vectorize -mattr=+sve -force-vector-width=4 -pass-remarks-analysis=loop-vectorize \
				; RUN: -prefer-predicate-over-epilogue=scalar-epilogue -S 2>%t \| FileCheck %s
	; RUN: cat %t \| FileCheck %s -check-prefix=CHECK-REMARKS			; RUN: cat %t \| FileCheck %s -check-prefix=CHECK-REMARKS
	target triple = "aarch64-linux-gnu"			target triple = "aarch64-linux-gnu"

	; CHECK-REMARKS: Scalable vectorization is not supported for all element types found in this loop			; CHECK-REMARKS: Scalable vectorization is not supported for all element types found in this loop
	define dso_local void @loop_sve_i128(i128* nocapture %ptr, i64 %N) {			define dso_local void @loop_sve_i128(i128* nocapture %ptr, i64 %N) {
	; CHECK-LABEL: @loop_sve_i128			; CHECK-LABEL: @loop_sve_i128
	; CHECK: vector.body			; CHECK: vector.body
	; CHECK: %[[LOAD1:.]] = load i128, i128 {{.*}}			; CHECK: %[[LOAD1:.]] = load i128, i128 {{.*}}
	▲ Show 20 Lines • Show All 126 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll

	; REQUIRES: asserts			; REQUIRES: asserts
	; RUN: opt -loop-vectorize -S < %s -debug 2>%t \| FileCheck %s			; RUN: opt -loop-vectorize -S < %s -debug -prefer-predicate-over-epilogue=scalar-epilogue 2>%t \| FileCheck %s
	; RUN: cat %t \| FileCheck %s --check-prefix=DEBUG			; RUN: cat %t \| FileCheck %s --check-prefix=DEBUG

	target triple = "aarch64-unknown-linux-gnu"			target triple = "aarch64-unknown-linux-gnu"

	; DEBUG: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %indvars.iv1294 = phi i7 [ %indvars.iv.next1295, %for.body ], [ 0, %entry ]			; DEBUG: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %indvars.iv1294 = phi i7 [ %indvars.iv.next1295, %for.body ], [ 0, %entry ]
	; DEBUG: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %addi7 = add i7 %indvars.iv1294, 0			; DEBUG: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %addi7 = add i7 %indvars.iv1294, 0
	; DEBUG: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %indvars.iv.next1295 = add i7 %indvars.iv1294, 1			; DEBUG: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %indvars.iv.next1295 = add i7 %indvars.iv1294, 1

	▲ Show 20 Lines • Show All 89 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt -loop-vectorize -force-target-instruction-cost=1 -dce -instcombine < %s -S \| FileCheck %s			; RUN: opt -loop-vectorize -force-target-instruction-cost=1 -dce -instcombine \
				; RUN: -prefer-predicate-over-epilogue=scalar-epilogue < %s -S \| FileCheck %s

	target triple = "aarch64-linux-gnu"			target triple = "aarch64-linux-gnu"

	; Test a case where the vectorised induction variable is used to			; Test a case where the vectorised induction variable is used to
	; generate a mask:			; generate a mask:
	; for (long long i = 0; i < n; i++) {			; for (long long i = 0; i < n; i++) {
	; if (i & 0x1)			; if (i & 0x1)
	; a[i] = b[i];			; a[i] = b[i];
	▲ Show 20 Lines • Show All 93 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll

	; RUN: opt -S -loop-vectorize -mattr=+sve -mtriple aarch64-linux-gnu < %s \| FileCheck %s			; RUN: opt -S -loop-vectorize -mattr=+sve -mtriple aarch64-linux-gnu \
				; RUN: -prefer-predicate-over-epilogue=scalar-epilogue < %s \| FileCheck %s

	define void @invariant_load(i64 %n, i32* noalias nocapture %a, i32* nocapture readonly %b) {			define void @invariant_load(i64 %n, i32* noalias nocapture %a, i32* nocapture readonly %b) {
	; CHECK-LABEL: @invariant_load			; CHECK-LABEL: @invariant_load
	; CHECK: vector.body:			; CHECK: vector.body:
	; CHECK: %[[GEP:.]] = getelementptr inbounds i32, i32 %b, i64 42			; CHECK: %[[GEP:.]] = getelementptr inbounds i32, i32 %b, i64 42
	; CHECK-NEXT: %[[INVLOAD:.]] = load i32, i32 %[[GEP]]			; CHECK-NEXT: %[[INVLOAD:.]] = load i32, i32 %[[GEP]]
	; CHECK-NEXT: %[[SPLATINS:.*]] = insertelement <vscale x 4 x i32> poison, i32 %[[INVLOAD]], i32 0			; CHECK-NEXT: %[[SPLATINS:.*]] = insertelement <vscale x 4 x i32> poison, i32 %[[INVLOAD]], i32 0
	; CHECK-NEXT: %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32> %[[SPLATINS]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer			; CHECK-NEXT: %[[SPLAT:.*]] = shufflevector <vscale x 4 x i32> %[[SPLATINS]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
	Show All 27 Lines

llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll

	; RUN: opt -loop-vectorize -S < %s \| FileCheck %s			; RUN: opt -loop-vectorize -S -prefer-predicate-over-epilogue=scalar-epilogue < %s \| FileCheck %s

	target triple = "aarch64-unknown-linux-gnu"			target triple = "aarch64-unknown-linux-gnu"

	define void @inv_store_i16(i16* noalias %dst, i16* noalias readonly %src, i64 %N) #0 {			define void @inv_store_i16(i16* noalias %dst, i16* noalias readonly %src, i64 %N) #0 {
	; CHECK-LABEL: @inv_store_i16(			; CHECK-LABEL: @inv_store_i16(
	; CHECK: vector.ph:			; CHECK: vector.ph:
	; CHECK: %[[TMP1:.]] = insertelement <vscale x 4 x i16> poison, i16* %dst, i32 0			; CHECK: %[[TMP1:.]] = insertelement <vscale x 4 x i16> poison, i16* %dst, i32 0
	; CHECK-NEXT: %[[SPLAT_PTRS:.]] = shufflevector <vscale x 4 x i16> %[[TMP1]], <vscale x 4 x i16*> poison, <vscale x 4 x i32> zeroinitializer			; CHECK-NEXT: %[[SPLAT_PTRS:.]] = shufflevector <vscale x 4 x i16> %[[TMP1]], <vscale x 4 x i16*> poison, <vscale x 4 x i32> zeroinitializer
	▲ Show 20 Lines • Show All 60 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll

	; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -dce -instcombine -S <%s \| FileCheck %s			; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -dce -instcombine -S \
				; RUN: -prefer-predicate-over-epilogue=scalar-epilogue <%s \| FileCheck %s

	define void @stride7_i32(i32* noalias nocapture %dst, i64 %n) #0 {			define void @stride7_i32(i32* noalias nocapture %dst, i64 %n) #0 {
	; CHECK-LABEL: @stride7_i32(			; CHECK-LABEL: @stride7_i32(
	; CHECK: vector.body			; CHECK: vector.body
	; CHECK: %[[VEC_IND:.]] = phi <vscale x 4 x i64> [ %{{.}}, %vector.ph ], [ %{{.*}}, %vector.body ]			; CHECK: %[[VEC_IND:.]] = phi <vscale x 4 x i64> [ %{{.}}, %vector.ph ], [ %{{.*}}, %vector.body ]
	; CHECK-NEXT: %[[PTR_INDICES:.*]] = mul nuw nsw <vscale x 4 x i64> %[[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 7, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)			; CHECK-NEXT: %[[PTR_INDICES:.*]] = mul nuw nsw <vscale x 4 x i64> %[[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 7, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
	; CHECK-NEXT: %[[PTRS:.]] = getelementptr inbounds i32, i32 %dst, <vscale x 4 x i64> %[[PTR_INDICES]]			; CHECK-NEXT: %[[PTRS:.]] = getelementptr inbounds i32, i32 %dst, <vscale x 4 x i64> %[[PTR_INDICES]]
	; CHECK-NEXT: %[[GLOAD:.]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %[[PTRS]]			; CHECK-NEXT: %[[GLOAD:.]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %[[PTRS]]
	▲ Show 20 Lines • Show All 92 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/sve-masked-loadstore.ll

	; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -o - \| FileCheck %s			; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve \
				; RUN: -prefer-predicate-over-epilogue=scalar-epilogue -S %s -o - \| FileCheck %s

	define void @mloadstore_f32(float* noalias nocapture %a, float* noalias nocapture readonly %b, i64 %n) {			define void @mloadstore_f32(float* noalias nocapture %a, float* noalias nocapture readonly %b, i64 %n) {
	; CHECK-LABEL: @mloadstore_f32			; CHECK-LABEL: @mloadstore_f32
	; CHECK: vector.body:			; CHECK: vector.body:
	; CHECK: %[[LOAD1:.]] = load <vscale x 4 x float>, <vscale x 4 x float>			; CHECK: %[[LOAD1:.]] = load <vscale x 4 x float>, <vscale x 4 x float>
	; CHECK-NEXT: %[[MASK:.*]] = fcmp ogt <vscale x 4 x float> %[[LOAD1]],			; CHECK-NEXT: %[[MASK:.*]] = fcmp ogt <vscale x 4 x float> %[[LOAD1]],
	; CHECK-NEXT: %[[GEPA:.]] = getelementptr float, float %a,			; CHECK-NEXT: %[[GEPA:.]] = getelementptr float, float %a,
	; CHECK-NEXT: %[[MLOAD_PTRS:.]] = bitcast float %[[GEPA]] to <vscale x 4 x float>*			; CHECK-NEXT: %[[MLOAD_PTRS:.]] = bitcast float %[[GEPA]] to <vscale x 4 x float>*
	▲ Show 20 Lines • Show All 73 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt -passes=loop-vectorize -mattr=+sve -S %s \| FileCheck %s			; RUN: opt -passes=loop-vectorize -mattr=+sve -prefer-predicate-over-epilogue=scalar-epilogue -S %s \| FileCheck %s

	target triple = "aarch64-unknown-linux-gnu"			target triple = "aarch64-unknown-linux-gnu"

	; Test case where the minimum profitable trip count due to runtime checks			; Test case where the minimum profitable trip count due to runtime checks
	; exceeds VF.getKnownMinValue() * UF.			; exceeds VF.getKnownMinValue() * UF.
	; FIXME: The code currently incorrectly is missing a umax(VF * UF, 28).			; FIXME: The code currently incorrectly is missing a umax(VF * UF, 28).
	define void @min_trip_count_due_to_runtime_checks_1(ptr %dst.1, ptr %dst.2, ptr %src.1, ptr %src.2, i64 %n) {			define void @min_trip_count_due_to_runtime_checks_1(ptr %dst.1, ptr %dst.2, ptr %src.1, ptr %src.2, i64 %n) {
	; CHECK-LABEL: @min_trip_count_due_to_runtime_checks_1(			; CHECK-LABEL: @min_trip_count_due_to_runtime_checks_1(
	▲ Show 20 Lines • Show All 131 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll

	; RUN: opt -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s \| FileCheck %s --check-prefix=CHECK-VF4IC1			; RUN: opt -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S \
	; RUN: opt -loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s \| FileCheck %s --check-prefix=CHECK-VF4IC4			; RUN: -prefer-predicate-over-epilogue=scalar-epilogue < %s \| FileCheck %s --check-prefix=CHECK-VF4IC1
				; RUN: opt -loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S \
				; RUN: -prefer-predicate-over-epilogue=scalar-epilogue < %s \| FileCheck %s --check-prefix=CHECK-VF4IC4

	target triple = "aarch64-linux-gnu"			target triple = "aarch64-linux-gnu"

	define i32 @select_const_i32_from_icmp(i32* nocapture readonly %v, i64 %n) #0 {			define i32 @select_const_i32_from_icmp(i32* nocapture readonly %v, i64 %n) #0 {
	; CHECK-VF4IC1-LABEL: @select_const_i32_from_icmp			; CHECK-VF4IC1-LABEL: @select_const_i32_from_icmp
	; CHECK-VF4IC1: vector.body:			; CHECK-VF4IC1: vector.body:
	; CHECK-VF4IC1: [[VEC_PHI:%.]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL:%.]], %vector.body ]			; CHECK-VF4IC1: [[VEC_PHI:%.]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL:%.]], %vector.body ]
	; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <vscale x 4 x i32>			; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <vscale x 4 x i32>
	▲ Show 20 Lines • Show All 194 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/sve-strict-fadd-cost.ll

	; REQUIRES: asserts			; REQUIRES: asserts
	; RUN: opt < %s -loop-vectorize -debug -disable-output -force-ordered-reductions=true -hints-allow-reordering=false \			; RUN: opt < %s -loop-vectorize -debug -disable-output -force-ordered-reductions=true -hints-allow-reordering=false \
	; RUN: -force-vector-width=4 -force-vector-interleave=1 -S 2>&1 \| FileCheck %s --check-prefix=CHECK-VF4			; RUN: -prefer-predicate-over-epilogue=scalar-epilogue -force-vector-width=4 -force-vector-interleave=1 \
				; RUN: -S 2>&1 \| FileCheck %s --check-prefix=CHECK-VF4
	; RUN: opt < %s -loop-vectorize -debug -disable-output -force-ordered-reductions=true -hints-allow-reordering=false \			; RUN: opt < %s -loop-vectorize -debug -disable-output -force-ordered-reductions=true -hints-allow-reordering=false \
	; RUN: -force-vector-width=8 -force-vector-interleave=1 -S 2>&1 \| FileCheck %s --check-prefix=CHECK-VF8			; RUN: -prefer-predicate-over-epilogue=scalar-epilogue -force-vector-width=8 -force-vector-interleave=1 \
				; RUN: -S 2>&1 \| FileCheck %s --check-prefix=CHECK-VF8
	; RUN: opt < %s -loop-vectorize -debug -disable-output -force-ordered-reductions=true -hints-allow-reordering=false \			; RUN: opt < %s -loop-vectorize -debug -disable-output -force-ordered-reductions=true -hints-allow-reordering=false \
	; RUN: -force-vector-width=4 -force-vector-interleave=1 -mcpu=neoverse-n2 -S 2>&1 \| FileCheck %s --check-prefix=CHECK-VF4-CPU-NEOVERSE-N2			; RUN: -prefer-predicate-over-epilogue=scalar-epilogue -force-vector-width=4 -force-vector-interleave=1 \
				; RUN: -mcpu=neoverse-n2 -S 2>&1 \| FileCheck %s --check-prefix=CHECK-VF4-CPU-NEOVERSE-N2

	target triple="aarch64-unknown-linux-gnu"			target triple="aarch64-unknown-linux-gnu"

	; CHECK-VF4: Found an estimated cost of 16 for VF vscale x 4 For instruction: %add = fadd float %0, %sum.07			; CHECK-VF4: Found an estimated cost of 16 for VF vscale x 4 For instruction: %add = fadd float %0, %sum.07
	; CHECK-VF8: Found an estimated cost of 32 for VF vscale x 8 For instruction: %add = fadd float %0, %sum.07			; CHECK-VF8: Found an estimated cost of 32 for VF vscale x 8 For instruction: %add = fadd float %0, %sum.07
	; CHECK-VF4-CPU-NEOVERSE-N2: Found an estimated cost of 8 for VF vscale x 4 For instruction: %add = fadd float %0, %sum.07			; CHECK-VF4-CPU-NEOVERSE-N2: Found an estimated cost of 8 for VF vscale x 4 For instruction: %add = fadd float %0, %sum.07

	define float @fadd_strict32(float* noalias nocapture readonly %a, i64 %n) #0 {			define float @fadd_strict32(float* noalias nocapture readonly %a, i64 %n) #0 {
	▲ Show 20 Lines • Show All 44 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll

	; This is the loop in c++ being vectorize in this file with			; This is the loop in c++ being vectorize in this file with
	; experimental.vector.reverse			; experimental.vector.reverse

	;#pragma clang loop vectorize_width(4, scalable)			;#pragma clang loop vectorize_width(4, scalable)
	; for (long int i = N - 1; i >= 0; i--)			; for (long int i = N - 1; i >= 0; i--)
	; {			; {
	; if (cond[i])			; if (cond[i])
	; a[i] += 1;			; a[i] += 1;
	; }			; }

	; The test checks if the mask is being correctly created, reverted and used			; The test checks if the mask is being correctly created, reverted and used

	; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -S < %s \| FileCheck %s			; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -S \
				; RUN: -prefer-predicate-over-epilogue=scalar-epilogue < %s \| FileCheck %s

	target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"			target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
	target triple = "aarch64-unknown-linux-gnu"			target triple = "aarch64-unknown-linux-gnu"

	define void @vector_reverse_mask_nxv4i1(double* %a, double* %cond, i64 %N) #0 {			define void @vector_reverse_mask_nxv4i1(double* %a, double* %cond, i64 %N) #0 {
	; CHECK-LABEL: vector.body:			; CHECK-LABEL: vector.body:
	; CHECK: %[[REVERSE6:.]] = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %{{.}})			; CHECK: %[[REVERSE6:.]] = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %{{.}})
	; CHECK: %[[WIDEMSKLOAD:.]] = call <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0nxv4f64(<vscale x 4 x double> %{{.*}}, i32 8, <vscale x 4 x i1> %[[REVERSE6]], <vscale x 4 x double> poison)			; CHECK: %[[WIDEMSKLOAD:.]] = call <vscale x 4 x double> @llvm.masked.load.nxv4f64.p0nxv4f64(<vscale x 4 x double> %{{.*}}, i32 8, <vscale x 4 x i1> %[[REVERSE6]], <vscale x 4 x double> poison)
	Show All 39 Lines

llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; This is the loop in c++ being vectorize in this file with			; This is the loop in c++ being vectorize in this file with
	;experimental.vector.reverse			;experimental.vector.reverse
	; #pragma clang loop vectorize_width(8, scalable)			; #pragma clang loop vectorize_width(8, scalable)
	; for (int i = N-1; i >= 0; --i)			; for (int i = N-1; i >= 0; --i)
	; a[i] = b[i] + 1.0;			; a[i] = b[i] + 1.0;

	; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -S < %s \| FileCheck %s			; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -S \
				; RUN: -prefer-predicate-over-epilogue=scalar-epilogue < %s \| FileCheck %s

	define void @vector_reverse_f64(i64 %N, double* %a, double* %b) #0{			define void @vector_reverse_f64(i64 %N, double* %a, double* %b) #0{
	; CHECK-LABEL: @vector_reverse_f64(			; CHECK-LABEL: @vector_reverse_f64(
	; CHECK-NEXT: entry:			; CHECK-NEXT: entry:
	; CHECK-NEXT: [[A2:%.]] = ptrtoint double [[A:%.*]] to i64			; CHECK-NEXT: [[A2:%.]] = ptrtoint double [[A:%.*]] to i64
	; CHECK-NEXT: [[B1:%.]] = ptrtoint double [[B:%.*]] to i64			; CHECK-NEXT: [[B1:%.]] = ptrtoint double [[B:%.*]] to i64
	; CHECK-NEXT: [[CMP7:%.]] = icmp sgt i64 [[N:%.]], 0			; CHECK-NEXT: [[CMP7:%.]] = icmp sgt i64 [[N:%.]], 0
	; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.]], label [[FOR_COND_CLEANUP:%.]]			; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.]], label [[FOR_COND_CLEANUP:%.]]
	▲ Show 20 Lines • Show All 188 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; REQUIRES: asserts			; REQUIRES: asserts
	; RUN: opt -loop-vectorize -S -mtriple=aarch64 -mattr=+sve -debug-only=loop-vectorize < %s 2>&1 \| FileCheck %s			; RUN: opt -loop-vectorize -S -mtriple=aarch64 -mattr=+sve -debug-only=loop-vectorize \
				; RUN: -prefer-predicate-over-epilogue=scalar-epilogue < %s 2>&1 \| FileCheck %s

	target triple = "aarch64-unknown-linux-gnu"			target triple = "aarch64-unknown-linux-gnu"

	; CHECK-LABEL: LV: Checking a loop in 'pointer_induction_used_as_vector'			; CHECK-LABEL: LV: Checking a loop in 'pointer_induction_used_as_vector'
	; CHECK-NOT: LV: Found {{.}} scalar instruction: %ptr.iv.2.next = getelementptr inbounds i8, i8 %ptr.iv.2, i64 1			; CHECK-NOT: LV: Found {{.}} scalar instruction: %ptr.iv.2.next = getelementptr inbounds i8, i8 %ptr.iv.2, i64 1
	;			;
	; CHECK: VPlan 'Initial VPlan for VF={vscale x 2},UF>=1' {			; CHECK: VPlan 'Initial VPlan for VF={vscale x 2},UF>=1' {
	; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count			; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
	▲ Show 20 Lines • Show All 198 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -dce -instcombine -S < %s \| FileCheck %s			; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -dce -instcombine -S \
				; RUN: -prefer-predicate-over-epilogue=scalar-epilogue < %s \| FileCheck %s

	; Ensure that we can vectorize loops such as:			; Ensure that we can vectorize loops such as:
	; int *ptr = c;			; int *ptr = c;
	; for (long long i = 0; i < n; i++) {			; for (long long i = 0; i < n; i++) {
	; int X1 = *ptr++;			; int X1 = *ptr++;
	; int X2 = *ptr++;			; int X2 = *ptr++;
	; a[i] = X1 + 1;			; a[i] = X1 + 1;
	; b[i] = X2 + 1;			; b[i] = X2 + 1;
	▲ Show 20 Lines • Show All 396 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; This is the loop in c++ being vectorize in this file with			; This is the loop in c++ being vectorize in this file with
	; shuffle reverse			; shuffle reverse

	;#pragma clang loop vectorize_width(4, fixed)			;#pragma clang loop vectorize_width(4, fixed)
	; for (long int i = N - 1; i >= 0; i--)			; for (long int i = N - 1; i >= 0; i--)
	; {			; {
	; if (cond[i])			; if (cond[i])
	; a[i] += 1;			; a[i] += 1;
	; }			; }

	; The test checks if the mask is being correctly created, reverted and used			; The test checks if the mask is being correctly created, reverted and used

	; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -S < %s \| FileCheck %s			; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -S \
				; RUN: -prefer-predicate-over-epilogue=scalar-epilogue < %s \| FileCheck %s

	target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"			target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
	target triple = "aarch64-unknown-linux-gnu"			target triple = "aarch64-unknown-linux-gnu"

	define void @vector_reverse_mask_v4i1(double* %a, double* %cond, i64 %N) #0 {			define void @vector_reverse_mask_v4i1(double* %a, double* %cond, i64 %N) #0 {
	; CHECK-LABEL: @vector_reverse_mask_v4i1(			; CHECK-LABEL: @vector_reverse_mask_v4i1(
	; CHECK-NEXT: entry:			; CHECK-NEXT: entry:
	; CHECK-NEXT: [[CMP7:%.]] = icmp sgt i64 [[N:%.]], 0			; CHECK-NEXT: [[CMP7:%.]] = icmp sgt i64 [[N:%.]], 0
	▲ Show 20 Lines • Show All 114 Lines • Show Last 20 Lines

llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll

	; Test VLA for reverse with fixed size vector			; Test VLA for reverse with fixed size vector
	; This is the loop in c++ being vectorize in this file with			; This is the loop in c++ being vectorize in this file with
	; shuffle reverse			; shuffle reverse
	; #pragma clang loop vectorize_width(8, fixed)			; #pragma clang loop vectorize_width(8, fixed)
	; for (int i = N-1; i >= 0; --i)			; for (int i = N-1; i >= 0; --i)
	; a[i] = b[i] + 1.0;			; a[i] = b[i] + 1.0;

	; RUN: opt -loop-vectorize -dce -mtriple aarch64-linux-gnu -S < %s \| FileCheck %s			; RUN: opt -loop-vectorize -dce -mtriple aarch64-linux-gnu -S \
				; RUN: -prefer-predicate-over-epilogue=scalar-epilogue < %s \| FileCheck %s

	define void @vector_reverse_f64(i64 %N, double* %a, double* %b) #0 {			define void @vector_reverse_f64(i64 %N, double* %a, double* %b) #0 {
	; CHECK-LABEL: vector_reverse_f64			; CHECK-LABEL: vector_reverse_f64
	; CHECK-LABEL: vector.body			; CHECK-LABEL: vector.body
	; CHECK: %[[GEP:.]] = getelementptr inbounds double, double %{{.*}}, i32 0			; CHECK: %[[GEP:.]] = getelementptr inbounds double, double %{{.*}}, i32 0
	; CHECK-NEXT: %[[GEP1:.]] = getelementptr inbounds double, double %[[GEP]], i32 -7			; CHECK-NEXT: %[[GEP1:.]] = getelementptr inbounds double, double %[[GEP]], i32 -7
	; CHECK-NEXT: %[[CAST:.]] = bitcast double %[[GEP1]] to <8 x double>*			; CHECK-NEXT: %[[CAST:.]] = bitcast double %[[GEP1]] to <8 x double>*
	; CHECK-NEXT: %[[WIDE:.]] = load <8 x double>, <8 x double> %[[CAST]], align 8			; CHECK-NEXT: %[[WIDE:.]] = load <8 x double>, <8 x double> %[[CAST]], align 8
	▲ Show 20 Lines • Show All 70 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[NFC][LoopVectorize] Explicitly disable tail-folding on some SVE tests
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 446493

clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c

llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll

llvm/test/Transforms/LoopVectorize/AArch64/i1-reg-usage.ll

llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll

llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll

llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll

llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll

llvm/test/Transforms/LoopVectorize/AArch64/scalarize-store-with-predication.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-masked-loadstore.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-strict-fadd-cost.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll

llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll

llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll

This is an archive of the discontinued LLVM Phabricator instance.

[NFC][LoopVectorize] Explicitly disable tail-folding on some SVE testsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 446493

clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c

llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll

llvm/test/Transforms/LoopVectorize/AArch64/i1-reg-usage.ll

llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll

llvm/test/Transforms/LoopVectorize/AArch64/scalable-reduction-inloop-cond.ll

llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll

llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll

llvm/test/Transforms/LoopVectorize/AArch64/scalarize-store-with-predication.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-basic-vec.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-fneg.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions-unusual-types.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-inductions.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-loads.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-masked-loadstore.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-strict-fadd-cost.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse-mask4.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll

llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll

llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll

llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll

[NFC][LoopVectorize] Explicitly disable tail-folding on some SVE tests
ClosedPublic