diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -4378,6 +4378,32 @@ (v4i32 VImm8000)))), (SQXTNv4i16 V128:$Vn)>; +// concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn) +// with reversed min/max +def : Pat<(v16i8 (concat_vectors + (v8i8 V64:$Vd), + (v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)), + (v8i16 VImm7F)))))), + (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; +def : Pat<(v16i8 (concat_vectors + (v8i8 V64:$Vd), + (v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)), + (v8i16 VImm80)))))), + (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; + +// concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn) +// with reversed min/max +def : Pat<(v8i16 (concat_vectors + (v4i16 V64:$Vd), + (v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)), + (v4i32 VImm7FFF)))))), + (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; +def : Pat<(v8i16 (concat_vectors + (v4i16 V64:$Vd), + (v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)), + (v4i32 VImm8000)))))), + (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; + //===----------------------------------------------------------------------===// // Advanced SIMD three vector instructions. //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/arm64-sqxtn2-combine.ll b/llvm/test/CodeGen/AArch64/arm64-sqxtn2-combine.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-sqxtn2-combine.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple aarch64-none-linux-gnu | FileCheck %s + +; Test the (concat_vectors (X), (trunc(smin(smax(Y, -2^n), 2^n-1))) pattern. + +define <16 x i8> @test_combine_v8i16_to_v16i8(<8 x i8> %x, <8 x i16> %y) { +; CHECK-LABEL: test_combine_v8i16_to_v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: sqxtn2 v0.16b, v1.8h +; CHECK-NEXT: ret +entry: + %min = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %y, <8 x i16> ) + %max = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %min, <8 x i16> ) + %trunc = trunc <8 x i16> %max to <8 x i8> + %shuffle = shufflevector <8 x i8> %x, <8 x i8> %trunc, <16 x i32> + ret <16 x i8> %shuffle +} + +define <8 x i16> @test_combine_v4i32_to_v8i16(<4 x i16> %x, <4 x i32> %y) { +; CHECK-LABEL: test_combine_v4i32_to_v8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: sqxtn2 v0.8h, v1.4s +; CHECK-NEXT: ret +entry: + %max = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %y, <4 x i32> ) + %min = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %max, <4 x i32> ) + %trunc = trunc <4 x i32> %min to <4 x i16> + %shuffle = shufflevector <4 x i16> %x, <4 x i16> %trunc, <8 x i32> + ret <8 x i16> %shuffle +} + +declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>) +declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>) diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll --- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll @@ -315,17 +315,12 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-CVT-LABEL: stest_f16i16: ; CHECK-CVT: // %bb.0: // %entry -; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: movi v1.4s, #127, msl #8 -; CHECK-CVT-NEXT: mvni v3.4s, #127, msl #8 -; CHECK-CVT-NEXT: fcvtzs v2.4s, v2.4s -; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-CVT-NEXT: smin v2.4s, v2.4s, v1.4s -; CHECK-CVT-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: smax v1.4s, v2.4s, v3.4s -; CHECK-CVT-NEXT: smax v0.4s, v0.4s, v3.4s -; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h +; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-CVT-NEXT: fcvtzs v2.4s, v0.4s +; CHECK-CVT-NEXT: sqxtn v0.4h, v1.4s +; CHECK-CVT-NEXT: sqxtn2 v0.8h, v2.4s ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: stest_f16i16: @@ -1028,17 +1023,12 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-CVT-LABEL: stest_f16i16_mm: ; CHECK-CVT: // %bb.0: // %entry -; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: movi v1.4s, #127, msl #8 -; CHECK-CVT-NEXT: mvni v3.4s, #127, msl #8 -; CHECK-CVT-NEXT: fcvtzs v2.4s, v2.4s -; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-CVT-NEXT: smin v2.4s, v2.4s, v1.4s -; CHECK-CVT-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: smax v1.4s, v2.4s, v3.4s -; CHECK-CVT-NEXT: smax v0.4s, v0.4s, v3.4s -; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h +; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-CVT-NEXT: fcvtzs v2.4s, v0.4s +; CHECK-CVT-NEXT: sqxtn v0.4h, v1.4s +; CHECK-CVT-NEXT: sqxtn2 v0.8h, v2.4s ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: stest_f16i16_mm: