diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -11107,6 +11107,12 @@ if (VT.isScalableVector()) return performSVEAndCombine(N, DCI); + // The combining code below works only for 128-bit vectors, as it uses a NEON + // instruction. In particular, it does not work for VLS SVE code generation + // when dealing with vectors wider than 128 bits. + if (VT.getSizeInBits() > 128) + return SDValue(); + BuildVectorSDNode *BVN = dyn_cast(N->getOperand(1).getNode()); if (!BVN) diff --git a/llvm/test/CodeGen/AArch64/vls-sve-128-and.ll b/llvm/test/CodeGen/AArch64/vls-sve-128-and.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/vls-sve-128-and.ll @@ -0,0 +1,33 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=128 -o - -asm-verbose=0 < %s | FileCheck %s + +; CHECK-LABEL: vls_sve_and_16xi8: +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret +define <16 x i8> @vls_sve_and_16xi8(<16 x i8> %a, <16 x i8> %b) nounwind { + %c = and <16 x i8> %a, %b + ret <16 x i8> %c +} + +; CHECK-LABEL: vls_sve_and_8xi16: +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret +define <8 x i16> @vls_sve_and_8xi16(<8 x i16> %a, <8 x i16> %b) nounwind { + %c = and <8 x i16> %a, %b + ret <8 x i16> %c +} + +; CHECK-LABEL: vls_sve_and_4xi32: +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret +define <4 x i32> @vls_sve_and_4xi32(<4 x i32> %a, <4 x i32> %b) nounwind { + %c = and <4 x i32> %a, %b + ret <4 x i32> %c +} + +; CHECK-LABEL: vls_sve_and_2xi64: +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret +define <2 x i64> @vls_sve_and_2xi64(<2 x i64> %a, <2 x i64> %b) nounwind { + %c = and <2 x i64> %a, %b + ret <2 x i64> %c +} diff --git a/llvm/test/CodeGen/AArch64/vls-sve-256-and.ll b/llvm/test/CodeGen/AArch64/vls-sve-256-and.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/vls-sve-256-and.ll @@ -0,0 +1,61 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=256 -o - -asm-verbose=0 < %s | FileCheck %s + +; CHECK-LABEL: vls_sve_and_32xi8: +; CHECK-NEXT: ptrue p0.b, vl32 +; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] +; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: st1b { z0.b }, p0, [x2] +; CHECK-NEXT: ret +define void @vls_sve_and_32xi8(<32 x i8>* %ap, <32 x i8>* %bp, <32 x i8>* %out) nounwind { + %a = load <32 x i8>, <32 x i8>* %ap + %b = load <32 x i8>, <32 x i8>* %bp + %c = and <32 x i8> %a, %b + store <32 x i8> %c, <32 x i8>* %out + ret void +} + +; CHECK-LABEL: vls_sve_and_16xi16: +; CHECK-NEXT: ptrue p0.h, vl16 +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: st1h { z0.h }, p0, [x2] +; CHECK-NEXT: ret +define void @vls_sve_and_16xi16(<16 x i16>* %ap, <16 x i16>* %bp, <16 x i16>* %out) nounwind { + %a = load <16 x i16>, <16 x i16>* %ap + %b = load <16 x i16>, <16 x i16>* %bp + %c = and <16 x i16> %a, %b + store <16 x i16> %c, <16 x i16>* %out + ret void +} + +; CHECK-LABEL: vls_sve_and_8xi32: +; CHECK-NEXT: ptrue p0.s, vl8 +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: st1w { z0.s }, p0, [x2] +; CHECK-NEXT: ret +define void @vls_sve_and_8xi32(<8 x i32>* %ap, <8 x i32>* %bp, <8 x i32>* %out) nounwind { + %a = load <8 x i32>, <8 x i32>* %ap + %b = load <8 x i32>, <8 x i32>* %bp + %c = and <8 x i32> %a, %b + store <8 x i32> %c, <8 x i32>* %out + ret void +} + +; CHECK-LABEL: vls_sve_and_4xi64: +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: st1d { z0.d }, p0, [x2] +; CHECK-NEXT: ret +define void @vls_sve_and_4xi64(<4 x i64>* %ap, <4 x i64>* %bp, <4 x i64>* %out) nounwind { + %a = load <4 x i64>, <4 x i64>* %ap + %b = load <4 x i64>, <4 x i64>* %bp + %c = and <4 x i64> %a, %b + store <4 x i64> %c, <4 x i64>* %out + ret void +} diff --git a/llvm/test/CodeGen/AArch64/vls-sve-512-and.ll b/llvm/test/CodeGen/AArch64/vls-sve-512-and.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/vls-sve-512-and.ll @@ -0,0 +1,61 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=512 -o - -asm-verbose=0 < %s | FileCheck %s + +; CHECK-LABEL: vls_sve_and_64xi8: +; CHECK-NEXT: ptrue p0.b, vl64 +; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] +; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: st1b { z0.b }, p0, [x2] +; CHECK-NEXT: ret +define void @vls_sve_and_64xi8(<64 x i8>* %ap, <64 x i8>* %bp, <64 x i8>* %out) nounwind { + %a = load <64 x i8>, <64 x i8>* %ap + %b = load <64 x i8>, <64 x i8>* %bp + %c = and <64 x i8> %a, %b + store <64 x i8> %c, <64 x i8>* %out + ret void +} + +; CHECK-LABEL: vls_sve_and_32xi16: +; CHECK-NEXT: ptrue p0.h, vl32 +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: st1h { z0.h }, p0, [x2] +; CHECK-NEXT: ret +define void @vls_sve_and_32xi16(<32 x i16>* %ap, <32 x i16>* %bp, <32 x i16>* %out) nounwind { + %a = load <32 x i16>, <32 x i16>* %ap + %b = load <32 x i16>, <32 x i16>* %bp + %c = and <32 x i16> %a, %b + store <32 x i16> %c, <32 x i16>* %out + ret void +} + +; CHECK-LABEL: vls_sve_and_16xi32: +; CHECK-NEXT: ptrue p0.s, vl16 +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: st1w { z0.s }, p0, [x2] +; CHECK-NEXT: ret +define void @vls_sve_and_16xi32(<16 x i32>* %ap, <16 x i32>* %bp, <16 x i32>* %out) nounwind { + %a = load <16 x i32>, <16 x i32>* %ap + %b = load <16 x i32>, <16 x i32>* %bp + %c = and <16 x i32> %a, %b + store <16 x i32> %c, <16 x i32>* %out + ret void +} + +; CHECK-LABEL: vls_sve_and_8xi64: +; CHECK-NEXT: ptrue p0.d, vl8 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: st1d { z0.d }, p0, [x2] +; CHECK-NEXT: ret +define void @vls_sve_and_8xi64(<8 x i64>* %ap, <8 x i64>* %bp, <8 x i64>* %out) nounwind { + %a = load <8 x i64>, <8 x i64>* %ap + %b = load <8 x i64>, <8 x i64>* %bp + %c = and <8 x i64> %a, %b + store <8 x i64> %c, <8 x i64>* %out + ret void +}