diff --git a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll @@ -0,0 +1,519 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s + +declare i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8>) #0 +declare i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8>) #0 +declare i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16>) #0 +declare i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16>) #0 +declare i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32>) #0 + +define void @insert_vec_v2i32_uaddlv_from_v8i16(ptr %0) { +; CHECK-LABEL: insert_vec_v2i32_uaddlv_from_v8i16: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: movi.2d v0, #0000000000000000 +; CHECK-NEXT: movi.2d v1, #0000000000000000 +; CHECK-NEXT: uaddlv.8h s0, v0 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov.s v1[0], w8 +; CHECK-NEXT: ucvtf.2s v0, v1 +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret + +entry: + %vaddlv = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> zeroinitializer) + %1 = insertelement <2 x i32> zeroinitializer, i32 %vaddlv, i64 0 + %2 = uitofp <2 x i32> %1 to <2 x float> + store <2 x float> %2, ptr %0, align 8 + ret void +} + +define void @insert_vec_v4i32_uaddlv_from_v8i16(ptr %0) { +; CHECK-LABEL: insert_vec_v4i32_uaddlv_from_v8i16: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: movi.2d v0, #0000000000000000 +; CHECK-NEXT: uaddlv.8h s1, v0 +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov.s v0[0], w8 +; CHECK-NEXT: ucvtf.4s v0, v0 +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret + +entry: + %vaddlv = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> zeroinitializer) + %1 = insertelement <4 x i32> zeroinitializer, i32 %vaddlv, i64 0 + %2 = uitofp <4 x i32> %1 to <4 x float> + store <4 x float> %2, ptr %0, align 8 + ret void +} + +define void @insert_vec_v16i32_uaddlv_from_v8i16(ptr %0) { +; CHECK-LABEL: insert_vec_v16i32_uaddlv_from_v8i16: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: movi.2d v0, #0000000000000000 +; CHECK-NEXT: movi.2d v2, #0000000000000000 +; CHECK-NEXT: uaddlv.8h s1, v0 +; CHECK-NEXT: stp q0, q0, [x0, #32] +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov.s v2[0], w8 +; CHECK-NEXT: ucvtf.4s v1, v2 +; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ret + +entry: + %vaddlv = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> zeroinitializer) + %1 = insertelement <16 x i32> zeroinitializer, i32 %vaddlv, i64 0 + %2 = uitofp <16 x i32> %1 to <16 x float> + store <16 x float> %2, ptr %0, align 8 + ret void +} + +define void @insert_vec_v23i32_uaddlv_from_v8i16(ptr %0) { +; CHECK-LABEL: insert_vec_v23i32_uaddlv_from_v8i16: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: movi.2d v0, #0000000000000000 +; CHECK-NEXT: movi.2d v2, #0000000000000000 +; CHECK-NEXT: uaddlv.8h s1, v0 +; CHECK-NEXT: stp q0, q0, [x0, #16] +; CHECK-NEXT: stp q0, q0, [x0, #48] +; CHECK-NEXT: str d0, [x0, #80] +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov.s v2[0], w8 +; CHECK-NEXT: add x8, x0, #88 +; CHECK-NEXT: st1.s { v0 }[2], [x8] +; CHECK-NEXT: ucvtf.4s v1, v2 +; CHECK-NEXT: str q1, [x0] +; CHECK-NEXT: ret + +entry: + %vaddlv = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> zeroinitializer) + %1 = insertelement <23 x i32> zeroinitializer, i32 %vaddlv, i64 0 + %2 = uitofp <23 x i32> %1 to <23 x float> + store <23 x float> %2, ptr %0, align 8 + ret void +} + +define void @insert_vec_v2i32_uaddlv_from_v16i8(ptr %0) { +; CHECK-LABEL: insert_vec_v2i32_uaddlv_from_v16i8: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: movi.2d v0, #0000000000000000 +; CHECK-NEXT: movi.2d v1, #0000000000000000 +; CHECK-NEXT: uaddlv.16b h0, v0 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov.s v1[0], w8 +; CHECK-NEXT: ucvtf.2s v0, v1 +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret + +entry: + %vaddlv = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> zeroinitializer) + %1 = insertelement <2 x i32> zeroinitializer, i32 %vaddlv, i64 0 + %2 = uitofp <2 x i32> %1 to <2 x float> + store <2 x float> %2, ptr %0, align 8 + ret void +} + +define void @insert_vec_v2i32_uaddlv_from_v8i8(ptr %0) { +; CHECK-LABEL: insert_vec_v2i32_uaddlv_from_v8i8: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: movi.2d v0, #0000000000000000 +; CHECK-NEXT: uaddlv.8b h1, v0 +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov.s v0[0], w8 +; CHECK-NEXT: ucvtf.2s v0, v0 +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret + +entry: + %vaddlv = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> zeroinitializer) + %1 = insertelement <2 x i32> zeroinitializer, i32 %vaddlv, i64 0 + %2 = uitofp <2 x i32> %1 to <2 x float> + store <2 x float> %2, ptr %0, align 8 + ret void +} + +define void @insert_vec_v2i32_uaddlv_from_v4i16(ptr %0) { +; CHECK-LABEL: insert_vec_v2i32_uaddlv_from_v4i16: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: movi.2d v0, #0000000000000000 +; CHECK-NEXT: uaddlv.4h s1, v0 +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov.s v0[0], w8 +; CHECK-NEXT: ucvtf.2s v0, v0 +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret + +entry: + %vaddlv = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16> zeroinitializer) + %1 = insertelement <2 x i32> zeroinitializer, i32 %vaddlv, i64 0 + %2 = uitofp <2 x i32> %1 to <2 x float> + store <2 x float> %2, ptr %0, align 8 + ret void +} + +define void @insert_vec_v6i64_uaddlv_from_v4i32(ptr %0) { +; CHECK-LABEL: insert_vec_v6i64_uaddlv_from_v4i32: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: movi.2d v1, #0000000000000000 +; CHECK-NEXT: movi d0, #0000000000000000 +; CHECK-NEXT: uaddlv.4s d2, v1 +; CHECK-NEXT: fmov x8, d2 +; CHECK-NEXT: movi.2d v2, #0000000000000000 +; CHECK-NEXT: mov.d v1[0], x8 +; CHECK-NEXT: str d2, [x0, #16] +; CHECK-NEXT: ucvtf.2d v1, v1 +; CHECK-NEXT: fcvtn v1.2s, v1.2d +; CHECK-NEXT: mov.d v1[1], v0[0] +; CHECK-NEXT: str q1, [x0] +; CHECK-NEXT: ret + +entry: + %vaddlv = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> zeroinitializer) + %1 = insertelement <6 x i64> zeroinitializer, i64 %vaddlv, i64 0 + %2 = uitofp <6 x i64> %1 to <6 x float> + store <6 x float> %2, ptr %0, align 8 + ret void +} + +define void @insert_vec_v2i64_uaddlv_from_v4i32(ptr %0) { +; CHECK-LABEL: insert_vec_v2i64_uaddlv_from_v4i32: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: movi.2d v0, #0000000000000000 +; CHECK-NEXT: uaddlv.4s d1, v0 +; CHECK-NEXT: fmov x8, d1 +; CHECK-NEXT: mov.d v0[0], x8 +; CHECK-NEXT: ucvtf.2d v0, v0 +; CHECK-NEXT: fcvtn v0.2s, v0.2d +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret + +entry: + %vaddlv = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> zeroinitializer) + %1 = insertelement <2 x i64> zeroinitializer, i64 %vaddlv, i64 0 + %2 = uitofp <2 x i64> %1 to <2 x float> + store <2 x float> %2, ptr %0, align 8 + ret void +} + +define void @insert_vec_v5i64_uaddlv_from_v4i32(ptr %0) { +; CHECK-LABEL: insert_vec_v5i64_uaddlv_from_v4i32: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: movi.2d v1, #0000000000000000 +; CHECK-NEXT: str wzr, [x0, #16] +; CHECK-NEXT: movi d0, #0000000000000000 +; CHECK-NEXT: uaddlv.4s d2, v1 +; CHECK-NEXT: fmov x8, d2 +; CHECK-NEXT: mov.d v1[0], x8 +; CHECK-NEXT: ucvtf.2d v1, v1 +; CHECK-NEXT: fcvtn v1.2s, v1.2d +; CHECK-NEXT: mov.d v1[1], v0[0] +; CHECK-NEXT: str q1, [x0] +; CHECK-NEXT: ret + +entry: + %vaddlv = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> zeroinitializer) + %1 = insertelement <5 x i64> zeroinitializer, i64 %vaddlv, i64 0 + %2 = uitofp <5 x i64> %1 to <5 x float> + store <5 x float> %2, ptr %0, align 8 + ret void +} + +define void @insert_vec_v8i16_uaddlv_from_v8i16(ptr %0) { +; CHECK-LABEL: insert_vec_v8i16_uaddlv_from_v8i16: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: movi.2d v0, #0000000000000000 +; CHECK-NEXT: stp xzr, xzr, [x0, #16] +; CHECK-NEXT: movi.2d v1, #0000000000000000 +; CHECK-NEXT: uaddlv.8h s0, v0 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov.h v1[0], w8 +; CHECK-NEXT: ushll.4s v0, v1, #0 +; CHECK-NEXT: ucvtf.4s v0, v0 +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret + +entry: + %vaddlv = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> zeroinitializer) + %1 = trunc i32 %vaddlv to i16 + %2 = insertelement <8 x i16> zeroinitializer, i16 %1, i64 0 + %3 = uitofp <8 x i16> %2 to <8 x float> + store <8 x float> %3, ptr %0, align 8 + ret void +} + +define void @insert_vec_v3i16_uaddlv_from_v8i16(ptr %0) { +; CHECK-LABEL: insert_vec_v3i16_uaddlv_from_v8i16: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: movi.2d v0, #0000000000000000 +; CHECK-NEXT: movi.2d v1, #0000000000000000 +; CHECK-NEXT: uaddlv.8h s0, v0 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov.h v1[0], w8 +; CHECK-NEXT: add x8, x0, #8 +; CHECK-NEXT: ushll.4s v0, v1, #0 +; CHECK-NEXT: ucvtf.4s v0, v0 +; CHECK-NEXT: st1.s { v0 }[2], [x8] +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret + +entry: + %vaddlv = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> zeroinitializer) + %1 = trunc i32 %vaddlv to i16 + %2 = insertelement <3 x i16> zeroinitializer, i16 %1, i64 0 + %3 = uitofp <3 x i16> %2 to <3 x float> + store <3 x float> %3, ptr %0, align 8 + ret void +} + +define void @insert_vec_v16i64_uaddlv_from_v4i16(ptr %0) { +; CHECK-LABEL: insert_vec_v16i64_uaddlv_from_v4i16: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: movi.2d v1, #0000000000000000 +; CHECK-NEXT: movi d0, #0000000000000000 +; CHECK-NEXT: movi.2d v3, #0000000000000000 +; CHECK-NEXT: uaddlv.4h s2, v1 +; CHECK-NEXT: stp q1, q1, [x0, #32] +; CHECK-NEXT: fmov w8, s2 +; CHECK-NEXT: mov.s v3[0], w8 +; CHECK-NEXT: ucvtf.2d v2, v3 +; CHECK-NEXT: fcvtn v2.2s, v2.2d +; CHECK-NEXT: mov.d v2[1], v0[0] +; CHECK-NEXT: stp q2, q1, [x0] +; CHECK-NEXT: ret + +entry: + %vaddlv = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16> zeroinitializer) + %1 = zext i32 %vaddlv to i64 + %2 = insertelement <16 x i64> zeroinitializer, i64 %1, i64 0 + %3 = uitofp <16 x i64> %2 to <16 x float> + store <16 x float> %3, ptr %0, align 8 + ret void +} + +define void @insert_vec_v16i8_uaddlv_from_v8i8(ptr %0) { +; CHECK-LABEL: insert_vec_v16i8_uaddlv_from_v8i8: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: movi.2d v0, #0000000000000000 +; CHECK-NEXT: movi.2d v2, #0000000000000000 +; CHECK-NEXT: uaddlv.8b h1, v0 +; CHECK-NEXT: stp q0, q0, [x0, #32] +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov.b v2[0], w8 +; CHECK-NEXT: zip1.8b v1, v2, v0 +; CHECK-NEXT: bic.4h v1, #255, lsl #8 +; CHECK-NEXT: ushll.4s v1, v1, #0 +; CHECK-NEXT: ucvtf.4s v1, v1 +; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ret + +entry: + %vaddlv = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> zeroinitializer) + %1 = trunc i32 %vaddlv to i8 + %2 = insertelement <16 x i8> zeroinitializer, i8 %1, i64 0 + %3 = uitofp <16 x i8> %2 to <16 x float> + store <16 x float> %3, ptr %0, align 8 + ret void +} + +define void @insert_vec_v8i8_uaddlv_from_v8i8(ptr %0) { +; CHECK-LABEL: insert_vec_v8i8_uaddlv_from_v8i8: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: movi.2d v0, #0000000000000000 +; CHECK-NEXT: stp xzr, xzr, [x0, #16] +; CHECK-NEXT: uaddlv.8b h1, v0 +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov.h v0[0], w8 +; CHECK-NEXT: bic.4h v0, #255, lsl #8 +; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: ucvtf.4s v0, v0 +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret + +entry: + %vaddlv = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> zeroinitializer) + %1 = trunc i32 %vaddlv to i8 + %2 = insertelement <8 x i8> zeroinitializer, i8 %1, i64 0 + %3 = uitofp <8 x i8> %2 to <8 x float> + store <8 x float> %3, ptr %0, align 8 + ret void +} + +define void @insert_vec_v12i16_uaddlv_from_v4i16(ptr %0) { +; CHECK-LABEL: insert_vec_v12i16_uaddlv_from_v4i16: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: movi.2d v0, #0000000000000000 +; CHECK-NEXT: stp xzr, xzr, [x0, #16] +; CHECK-NEXT: stp xzr, xzr, [x0, #32] +; CHECK-NEXT: uaddlv.4h s1, v0 +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov.h v0[0], w8 +; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: ucvtf.4s v0, v0 +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret + +entry: + %vaddlv = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16> zeroinitializer) + %1 = trunc i32 %vaddlv to i16 + %2 = insertelement <12 x i16> zeroinitializer, i16 %1, i64 0 + %3 = uitofp <12 x i16> %2 to <12 x float> + store <12 x float> %3, ptr %0, align 8 + ret void +} + +define void @insert_vec_v8i32_uaddlv_from_v4i32(ptr %0) { +; CHECK-LABEL: insert_vec_v8i32_uaddlv_from_v4i32: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: movi.2d v0, #0000000000000000 +; CHECK-NEXT: stp xzr, xzr, [x0, #16] +; CHECK-NEXT: uaddlv.4s d1, v0 +; CHECK-NEXT: fmov x8, d1 +; CHECK-NEXT: mov.s v0[0], w8 +; CHECK-NEXT: ucvtf.4s v0, v0 +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret + +entry: + %vaddlv = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> zeroinitializer) + %1 = trunc i64 %vaddlv to i32 + %2 = insertelement <8 x i32> zeroinitializer, i32 %1, i64 0 + %3 = uitofp <8 x i32> %2 to <8 x float> + store <8 x float> %3, ptr %0, align 8 + ret void +} + +define void @insert_vec_v16i32_uaddlv_from_v4i32(ptr %0) { +; CHECK-LABEL: insert_vec_v16i32_uaddlv_from_v4i32: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: movi.2d v0, #0000000000000000 +; CHECK-NEXT: movi.2d v2, #0000000000000000 +; CHECK-NEXT: uaddlv.4s d1, v0 +; CHECK-NEXT: stp q0, q0, [x0, #32] +; CHECK-NEXT: fmov x8, d1 +; CHECK-NEXT: mov.s v2[0], w8 +; CHECK-NEXT: ucvtf.4s v1, v2 +; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ret + +entry: + %vaddlv = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> zeroinitializer) + %1 = trunc i64 %vaddlv to i32 + %2 = insertelement <16 x i32> zeroinitializer, i32 %1, i64 0 + %3 = uitofp <16 x i32> %2 to <16 x float> + store <16 x float> %3, ptr %0, align 8 + ret void +} + +define void @insert_vec_v4i16_uaddlv_from_v4i32(ptr %0) { +; CHECK-LABEL: insert_vec_v4i16_uaddlv_from_v4i32: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: movi.2d v0, #0000000000000000 +; CHECK-NEXT: movi.2d v1, #0000000000000000 +; CHECK-NEXT: uaddlv.4s d0, v0 +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: mov.h v1[0], w8 +; CHECK-NEXT: ushll.4s v0, v1, #0 +; CHECK-NEXT: ucvtf.4s v0, v0 +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret + +entry: + %vaddlv = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> zeroinitializer) + %1 = trunc i64 %vaddlv to i16 + %2 = insertelement <4 x i16> zeroinitializer, i16 %1, i64 0 + %3 = uitofp <4 x i16> %2 to <4 x float> + store <4 x float> %3, ptr %0, align 8 + ret void +} + +define void @insert_vec_v16i16_uaddlv_from_v4i32(ptr %0) { +; CHECK-LABEL: insert_vec_v16i16_uaddlv_from_v4i32: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: movi.2d v0, #0000000000000000 +; CHECK-NEXT: movi.2d v1, #0000000000000000 +; CHECK-NEXT: uaddlv.4s d0, v0 +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: movi.2d v0, #0000000000000000 +; CHECK-NEXT: mov.h v1[0], w8 +; CHECK-NEXT: stp q0, q0, [x0, #32] +; CHECK-NEXT: ushll.4s v1, v1, #0 +; CHECK-NEXT: ucvtf.4s v1, v1 +; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ret + +entry: + %vaddlv = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> zeroinitializer) + %1 = trunc i64 %vaddlv to i16 + %2 = insertelement <16 x i16> zeroinitializer, i16 %1, i64 0 + %3 = uitofp <16 x i16> %2 to <16 x float> + store <16 x float> %3, ptr %0, align 8 + ret void +} + +define void @insert_vec_v8i8_uaddlv_from_v4i32(ptr %0) { +; CHECK-LABEL: insert_vec_v8i8_uaddlv_from_v4i32: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: movi.2d v0, #0000000000000000 +; CHECK-NEXT: stp xzr, xzr, [x0, #16] +; CHECK-NEXT: movi.2d v1, #0000000000000000 +; CHECK-NEXT: uaddlv.4s d0, v0 +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: mov.h v1[0], w8 +; CHECK-NEXT: bic.4h v1, #255, lsl #8 +; CHECK-NEXT: ushll.4s v0, v1, #0 +; CHECK-NEXT: ucvtf.4s v0, v0 +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret + +entry: + %vaddlv = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> zeroinitializer) + %1 = trunc i64 %vaddlv to i8 + %2 = insertelement <8 x i8> zeroinitializer, i8 %1, i64 0 + %3 = uitofp <8 x i8> %2 to <8 x float> + store <8 x float> %3, ptr %0, align 8 + ret void +} + +define void @insert_vec_v16i8_uaddlv_from_v4i32(ptr %0) { +; CHECK-LABEL: insert_vec_v16i8_uaddlv_from_v4i32: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: movi.2d v0, #0000000000000000 +; CHECK-NEXT: movi.2d v1, #0000000000000000 +; CHECK-NEXT: uaddlv.4s d0, v0 +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: mov.b v1[0], w8 +; CHECK-NEXT: zip1.8b v0, v1, v0 +; CHECK-NEXT: movi.2d v1, #0000000000000000 +; CHECK-NEXT: bic.4h v0, #255, lsl #8 +; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: stp q1, q1, [x0, #32] +; CHECK-NEXT: ucvtf.4s v0, v0 +; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: ret + +entry: + %vaddlv = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> zeroinitializer) + %1 = trunc i64 %vaddlv to i8 + %2 = insertelement <16 x i8> zeroinitializer, i8 %1, i64 0 + %3 = uitofp <16 x i8> %2 to <16 x float> + store <16 x float> %3, ptr %0, align 8 + ret void +} + +define void @insert_vec_v2i32_uaddlv_from_v8i16_nz_index(ptr %0) { +; CHECK-LABEL: insert_vec_v2i32_uaddlv_from_v8i16_nz_index: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: movi.2d v0, #0000000000000000 +; CHECK-NEXT: uaddlv.8h s1, v0 +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov.s v0[2], w8 +; CHECK-NEXT: ucvtf.4s v0, v0 +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret + +entry: + %vaddlv = tail call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> zeroinitializer) + %1 = insertelement <4 x i32> zeroinitializer, i32 %vaddlv, i64 2 + %2 = uitofp <4 x i32> %1 to <4 x float> + store <4 x float> %2, ptr %0, align 8 + ret void +} diff --git a/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir b/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/peephole-insvigpr.mir @@ -0,0 +1,407 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass=aarch64-mi-peephole-opt -mtriple=aarch64-unknown-linux -verify-machineinstrs -o - %s | FileCheck %s +--- | + source_filename = "/Users/nilanjana/Documents/code/llvm-project/llvm/test/CodeGen/AArch64/tmp.ll" + + ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) + declare i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16>) #0 + + ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) + declare i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32>) #0 + + define void @insert_vec_v6i64_uaddlv_from_v4i32(ptr %0) { + entry: + ret void + } + + define void @insert_vec_v2i32_uaddlv_from_v8i16(ptr %0) { + entry: + ret void + } + + define void @insert_vec_v8i16_uaddlv_from_v8i16(ptr %0) { + entry: + ret void + } + + define void @insert_vec_v16i8_uaddlv_from_v4i32(ptr %0) { + entry: + ret void + } + + define void @insert_vec_v2i32_uaddlv_from_v8i16_nz_index(ptr %0) { + entry: + ret void + } + + ; The optimization is not applicable when the source is not a virtual register + define void @insert_vec_from_gpr(i32 %v, ptr %p) { + entry: + ret void + } + + attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } + +... +--- +name: insert_vec_v6i64_uaddlv_from_v4i32 +registers: + - { id: 0, class: gpr64common, preferred-register: '' } + - { id: 1, class: fpr128, preferred-register: '' } + - { id: 2, class: fpr64, preferred-register: '' } + - { id: 3, class: fpr128, preferred-register: '' } + - { id: 4, class: fpr128, preferred-register: '' } + - { id: 5, class: gpr64, preferred-register: '' } + - { id: 6, class: fpr128, preferred-register: '' } + - { id: 7, class: fpr128, preferred-register: '' } + - { id: 8, class: fpr64, preferred-register: '' } + - { id: 9, class: fpr128, preferred-register: '' } + - { id: 10, class: fpr128, preferred-register: '' } + - { id: 11, class: fpr128, preferred-register: '' } + - { id: 12, class: fpr64, preferred-register: '' } + - { id: 13, class: fpr128, preferred-register: '' } + - { id: 14, class: fpr128, preferred-register: '' } + - { id: 15, class: fpr128, preferred-register: '' } + - { id: 16, class: gpr64all, preferred-register: '' } + - { id: 17, class: fpr64, preferred-register: '' } +liveins: + - { reg: '$x0', virtual-reg: '%0' } +body: | + bb.0.entry: + liveins: $x0 + + ; CHECK-LABEL: name: insert_vec_v6i64_uaddlv_from_v4i32 + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[MOVIv2d_ns:%[0-9]+]]:fpr128 = MOVIv2d_ns 0 + ; CHECK-NEXT: [[UADDLVv4i32v:%[0-9]+]]:fpr64 = UADDLVv4i32v [[MOVIv2d_ns]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], killed [[UADDLVv4i32v]], %subreg.dsub + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY [[INSERT_SUBREG]].dsub + ; CHECK-NEXT: [[INSvi64gpr:%[0-9]+]]:fpr128 = INSvi64gpr [[MOVIv2d_ns]], 0, killed [[COPY1]] + ; CHECK-NEXT: [[MOVID:%[0-9]+]]:fpr64 = MOVID 0 + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], killed [[MOVID]], %subreg.dsub + ; CHECK-NEXT: [[UCVTFv2f64_:%[0-9]+]]:fpr128 = nofpexcept UCVTFv2f64 killed [[INSvi64gpr]], implicit $fpcr + ; CHECK-NEXT: [[FCVTNv2i32_:%[0-9]+]]:fpr64 = nofpexcept FCVTNv2i32 killed [[UCVTFv2f64_]], implicit $fpcr + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF2]], killed [[FCVTNv2i32_]], %subreg.dsub + ; CHECK-NEXT: [[INSvi64lane:%[0-9]+]]:fpr128 = INSvi64lane [[INSERT_SUBREG2]], 1, killed [[INSERT_SUBREG1]], 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY [[MOVIv2d_ns]].dsub + ; CHECK-NEXT: STRDui killed [[COPY2]], [[COPY]], 2 :: (store (s64) into %ir.0 + 16) + ; CHECK-NEXT: STRQui killed [[INSvi64lane]], [[COPY]], 0 :: (store (s128) into %ir.0, align 8) + ; CHECK-NEXT: RET_ReallyLR + %0:gpr64common = COPY $x0 + %1:fpr128 = MOVIv2d_ns 0 + %2:fpr64 = UADDLVv4i32v %1 + %4:fpr128 = IMPLICIT_DEF + %3:fpr128 = INSERT_SUBREG %4, killed %2, %subreg.dsub + %5:gpr64 = COPY %3.dsub + %7:fpr128 = INSvi64gpr %1, 0, killed %5 + %8:fpr64 = MOVID 0 + %10:fpr128 = IMPLICIT_DEF + %9:fpr128 = INSERT_SUBREG %10, killed %8, %subreg.dsub + %11:fpr128 = nofpexcept UCVTFv2f64 killed %7, implicit $fpcr + %12:fpr64 = nofpexcept FCVTNv2i32 killed %11, implicit $fpcr + %14:fpr128 = IMPLICIT_DEF + %13:fpr128 = INSERT_SUBREG %14, killed %12, %subreg.dsub + %15:fpr128 = INSvi64lane %13, 1, killed %9, 0 + %17:fpr64 = COPY %1.dsub + STRDui killed %17, %0, 2 :: (store (s64) into %ir.0 + 16) + STRQui killed %15, %0, 0 :: (store (s128) into %ir.0, align 8) + RET_ReallyLR + +... +--- +name: insert_vec_v2i32_uaddlv_from_v8i16 +registers: + - { id: 0, class: gpr64common, preferred-register: '' } + - { id: 1, class: fpr128, preferred-register: '' } + - { id: 2, class: fpr32, preferred-register: '' } + - { id: 3, class: fpr128, preferred-register: '' } + - { id: 4, class: fpr128, preferred-register: '' } + - { id: 5, class: gpr32, preferred-register: '' } + - { id: 6, class: fpr64, preferred-register: '' } + - { id: 7, class: fpr128, preferred-register: '' } + - { id: 8, class: fpr128, preferred-register: '' } + - { id: 9, class: fpr128, preferred-register: '' } + - { id: 10, class: fpr64, preferred-register: '' } + - { id: 11, class: fpr64, preferred-register: '' } +liveins: + - { reg: '$x0', virtual-reg: '%0' } +body: | + bb.0.entry: + liveins: $x0 + + ; CHECK-LABEL: name: insert_vec_v2i32_uaddlv_from_v8i16 + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[MOVIv2d_ns:%[0-9]+]]:fpr128 = MOVIv2d_ns 0 + ; CHECK-NEXT: [[UADDLVv8i16v:%[0-9]+]]:fpr32 = UADDLVv8i16v killed [[MOVIv2d_ns]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], killed [[UADDLVv8i16v]], %subreg.ssub + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[INSERT_SUBREG]].ssub + ; CHECK-NEXT: [[MOVID:%[0-9]+]]:fpr64 = MOVID 0 + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], killed [[MOVID]], %subreg.dsub + ; CHECK-NEXT: [[INSvi32gpr:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG1]], 0, killed [[COPY1]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY [[INSvi32gpr]].dsub + ; CHECK-NEXT: [[UCVTFv2f32_:%[0-9]+]]:fpr64 = nofpexcept UCVTFv2f32 killed [[COPY2]], implicit $fpcr + ; CHECK-NEXT: STRDui killed [[UCVTFv2f32_]], [[COPY]], 0 :: (store (s64) into %ir.0) + ; CHECK-NEXT: RET_ReallyLR + %0:gpr64common = COPY $x0 + %1:fpr128 = MOVIv2d_ns 0 + %2:fpr32 = UADDLVv8i16v killed %1 + %4:fpr128 = IMPLICIT_DEF + %3:fpr128 = INSERT_SUBREG %4, killed %2, %subreg.ssub + %5:gpr32 = COPY %3.ssub + %6:fpr64 = MOVID 0 + %8:fpr128 = IMPLICIT_DEF + %7:fpr128 = INSERT_SUBREG %8, killed %6, %subreg.dsub + %9:fpr128 = INSvi32gpr %7, 0, killed %5 + %10:fpr64 = COPY %9.dsub + %11:fpr64 = nofpexcept UCVTFv2f32 killed %10, implicit $fpcr + STRDui killed %11, %0, 0 :: (store (s64) into %ir.0) + RET_ReallyLR + +... +--- +name: insert_vec_v8i16_uaddlv_from_v8i16 +registers: + - { id: 0, class: gpr64common, preferred-register: '' } + - { id: 1, class: fpr128, preferred-register: '' } + - { id: 2, class: fpr32, preferred-register: '' } + - { id: 3, class: fpr128, preferred-register: '' } + - { id: 4, class: fpr128, preferred-register: '' } + - { id: 5, class: gpr32, preferred-register: '' } + - { id: 6, class: fpr64, preferred-register: '' } + - { id: 7, class: fpr128, preferred-register: '' } + - { id: 8, class: fpr128, preferred-register: '' } + - { id: 9, class: fpr128, preferred-register: '' } + - { id: 10, class: fpr64, preferred-register: '' } + - { id: 11, class: fpr128, preferred-register: '' } + - { id: 12, class: fpr128, preferred-register: '' } + - { id: 13, class: gpr32, preferred-register: '' } +liveins: + - { reg: '$x0', virtual-reg: '%0' } +body: | + bb.0.entry: + liveins: $x0 + + ; CHECK-LABEL: name: insert_vec_v8i16_uaddlv_from_v8i16 + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[MOVIv2d_ns:%[0-9]+]]:fpr128 = MOVIv2d_ns 0 + ; CHECK-NEXT: [[UADDLVv8i16v:%[0-9]+]]:fpr32 = UADDLVv8i16v killed [[MOVIv2d_ns]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], killed [[UADDLVv8i16v]], %subreg.ssub + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[INSERT_SUBREG]].ssub + ; CHECK-NEXT: [[MOVID:%[0-9]+]]:fpr64 = MOVID 0 + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], killed [[MOVID]], %subreg.dsub + ; CHECK-NEXT: [[INSvi16gpr:%[0-9]+]]:fpr128 = INSvi16gpr [[INSERT_SUBREG1]], 0, killed [[COPY1]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY [[INSvi16gpr]].dsub + ; CHECK-NEXT: [[USHLLv4i16_shift:%[0-9]+]]:fpr128 = USHLLv4i16_shift killed [[COPY2]], 0 + ; CHECK-NEXT: [[UCVTFv4f32_:%[0-9]+]]:fpr128 = nofpexcept UCVTFv4f32 killed [[USHLLv4i16_shift]], implicit $fpcr + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY $wzr + ; CHECK-NEXT: STRWui [[COPY3]], [[COPY]], 7 :: (store (s32) into %ir.0 + 28) + ; CHECK-NEXT: STRWui [[COPY3]], [[COPY]], 6 :: (store (s32) into %ir.0 + 24, align 8) + ; CHECK-NEXT: STRWui [[COPY3]], [[COPY]], 5 :: (store (s32) into %ir.0 + 20) + ; CHECK-NEXT: STRWui [[COPY3]], [[COPY]], 4 :: (store (s32) into %ir.0 + 16, align 8) + ; CHECK-NEXT: STRQui killed [[UCVTFv4f32_]], [[COPY]], 0 :: (store (s128) into %ir.0, align 8) + ; CHECK-NEXT: RET_ReallyLR + %0:gpr64common = COPY $x0 + %1:fpr128 = MOVIv2d_ns 0 + %2:fpr32 = UADDLVv8i16v killed %1 + %4:fpr128 = IMPLICIT_DEF + %3:fpr128 = INSERT_SUBREG %4, killed %2, %subreg.ssub + %5:gpr32 = COPY %3.ssub + %6:fpr64 = MOVID 0 + %8:fpr128 = IMPLICIT_DEF + %7:fpr128 = INSERT_SUBREG %8, killed %6, %subreg.dsub + %9:fpr128 = INSvi16gpr %7, 0, killed %5 + %10:fpr64 = COPY %9.dsub + %11:fpr128 = USHLLv4i16_shift killed %10, 0 + %12:fpr128 = nofpexcept UCVTFv4f32 killed %11, implicit $fpcr + %13:gpr32 = COPY $wzr + STRWui %13, %0, 7 :: (store (s32) into %ir.0 + 28) + STRWui %13, %0, 6 :: (store (s32) into %ir.0 + 24, align 8) + STRWui %13, %0, 5 :: (store (s32) into %ir.0 + 20) + STRWui %13, %0, 4 :: (store (s32) into %ir.0 + 16, align 8) + STRQui killed %12, %0, 0 :: (store (s128) into %ir.0, align 8) + RET_ReallyLR + +... +--- +name: insert_vec_v16i8_uaddlv_from_v4i32 +registers: + - { id: 0, class: gpr64common, preferred-register: '' } + - { id: 1, class: fpr128, preferred-register: '' } + - { id: 2, class: fpr64, preferred-register: '' } + - { id: 3, class: fpr128, preferred-register: '' } + - { id: 4, class: fpr128, preferred-register: '' } + - { id: 5, class: gpr64all, preferred-register: '' } + - { id: 6, class: gpr32, preferred-register: '' } + - { id: 7, class: fpr64, preferred-register: '' } + - { id: 8, class: fpr128, preferred-register: '' } + - { id: 9, class: fpr128, preferred-register: '' } + - { id: 10, class: fpr128, preferred-register: '' } + - { id: 11, class: fpr64, preferred-register: '' } + - { id: 12, class: fpr64, preferred-register: '' } + - { id: 13, class: fpr64, preferred-register: '' } + - { id: 14, class: fpr64, preferred-register: '' } + - { id: 15, class: fpr128, preferred-register: '' } + - { id: 16, class: fpr128, preferred-register: '' } + - { id: 17, class: fpr128, preferred-register: '' } +liveins: + - { reg: '$x0', virtual-reg: '%0' } +body: | + bb.0.entry: + liveins: $x0 + + ; CHECK-LABEL: name: insert_vec_v16i8_uaddlv_from_v4i32 + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[MOVIv2d_ns:%[0-9]+]]:fpr128 = MOVIv2d_ns 0 + ; CHECK-NEXT: [[UADDLVv4i32v:%[0-9]+]]:fpr64 = UADDLVv4i32v [[MOVIv2d_ns]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], killed [[UADDLVv4i32v]], %subreg.dsub + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64all = COPY [[INSERT_SUBREG]].dsub + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]].sub_32 + ; CHECK-NEXT: [[MOVID:%[0-9]+]]:fpr64 = MOVID 0 + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], killed [[MOVID]], %subreg.dsub + ; CHECK-NEXT: [[INSvi8gpr:%[0-9]+]]:fpr128 = INSvi8gpr [[INSERT_SUBREG1]], 0, killed [[COPY2]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY [[INSvi8gpr]].dsub + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:fpr64 = IMPLICIT_DEF + ; CHECK-NEXT: [[ZIP1v8i8_:%[0-9]+]]:fpr64 = ZIP1v8i8 killed [[COPY3]], killed [[DEF2]] + ; CHECK-NEXT: [[BICv4i16_:%[0-9]+]]:fpr64 = BICv4i16 [[ZIP1v8i8_]], 255, 8 + ; CHECK-NEXT: [[USHLLv4i16_shift:%[0-9]+]]:fpr128 = USHLLv4i16_shift killed [[BICv4i16_]], 0 + ; CHECK-NEXT: [[UCVTFv4f32_:%[0-9]+]]:fpr128 = nofpexcept UCVTFv4f32 killed [[USHLLv4i16_shift]], implicit $fpcr + ; CHECK-NEXT: STRQui [[MOVIv2d_ns]], [[COPY]], 3 :: (store (s128) into %ir.0 + 48, align 8) + ; CHECK-NEXT: STRQui [[MOVIv2d_ns]], [[COPY]], 2 :: (store (s128) into %ir.0 + 32, align 8) + ; CHECK-NEXT: STRQui [[MOVIv2d_ns]], [[COPY]], 1 :: (store (s128) into %ir.0 + 16, align 8) + ; CHECK-NEXT: STRQui killed [[UCVTFv4f32_]], [[COPY]], 0 :: (store (s128) into %ir.0, align 8) + ; CHECK-NEXT: RET_ReallyLR + %0:gpr64common = COPY $x0 + %1:fpr128 = MOVIv2d_ns 0 + %2:fpr64 = UADDLVv4i32v %1 + %4:fpr128 = IMPLICIT_DEF + %3:fpr128 = INSERT_SUBREG %4, killed %2, %subreg.dsub + %5:gpr64all = COPY %3.dsub + %6:gpr32 = COPY %5.sub_32 + %7:fpr64 = MOVID 0 + %9:fpr128 = IMPLICIT_DEF + %8:fpr128 = INSERT_SUBREG %9, killed %7, %subreg.dsub + %10:fpr128 = INSvi8gpr %8, 0, killed %6 + %11:fpr64 = COPY %10.dsub + %13:fpr64 = IMPLICIT_DEF + %12:fpr64 = ZIP1v8i8 killed %11, killed %13 + %14:fpr64 = BICv4i16 %12, 255, 8 + %15:fpr128 = USHLLv4i16_shift killed %14, 0 + %16:fpr128 = nofpexcept UCVTFv4f32 killed %15, implicit $fpcr + STRQui %1, %0, 3 :: (store (s128) into %ir.0 + 48, align 8) + STRQui %1, %0, 2 :: (store (s128) into %ir.0 + 32, align 8) + STRQui %1, %0, 1 :: (store (s128) into %ir.0 + 16, align 8) + STRQui killed %16, %0, 0 :: (store (s128) into %ir.0, align 8) + RET_ReallyLR + +... +--- +name: insert_vec_v2i32_uaddlv_from_v8i16_nz_index +registers: + - { id: 0, class: gpr64common, preferred-register: '' } + - { id: 1, class: fpr128, preferred-register: '' } + - { id: 2, class: fpr32, preferred-register: '' } + - { id: 3, class: fpr128, preferred-register: '' } + - { id: 4, class: fpr128, preferred-register: '' } + - { id: 5, class: gpr32, preferred-register: '' } + - { id: 6, class: fpr128, preferred-register: '' } + - { id: 7, class: fpr128, preferred-register: '' } + - { id: 8, class: fpr128, preferred-register: '' } +liveins: + - { reg: '$x0', virtual-reg: '%0' } +body: | + bb.0.entry: + liveins: $x0 + + ; CHECK-LABEL: name: insert_vec_v2i32_uaddlv_from_v8i16_nz_index + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[MOVIv2d_ns:%[0-9]+]]:fpr128 = MOVIv2d_ns 0 + ; CHECK-NEXT: [[UADDLVv8i16v:%[0-9]+]]:fpr32 = UADDLVv8i16v [[MOVIv2d_ns]] + ; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], killed [[UADDLVv8i16v]], %subreg.ssub + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[INSERT_SUBREG]].ssub + ; CHECK-NEXT: [[INSvi32gpr:%[0-9]+]]:fpr128 = INSvi32gpr [[MOVIv2d_ns]], 2, killed [[COPY1]] + ; CHECK-NEXT: [[UCVTFv4f32_:%[0-9]+]]:fpr128 = nofpexcept UCVTFv4f32 killed [[INSvi32gpr]], implicit $fpcr + ; CHECK-NEXT: STRQui killed [[UCVTFv4f32_]], [[COPY]], 0 :: (store (s128) into %ir.0, align 8) + ; CHECK-NEXT: RET_ReallyLR + %0:gpr64common = COPY $x0 + %1:fpr128 = MOVIv2d_ns 0 + %2:fpr32 = UADDLVv8i16v %1 + %4:fpr128 = IMPLICIT_DEF + %3:fpr128 = INSERT_SUBREG %4, killed %2, %subreg.ssub + %5:gpr32 = COPY %3.ssub + %7:fpr128 = INSvi32gpr %1, 2, killed %5 + %8:fpr128 = nofpexcept UCVTFv4f32 killed %7, implicit $fpcr + STRQui killed %8, %0, 0 :: (store (s128) into %ir.0, align 8) + RET_ReallyLR + +... +--- +name: insert_vec_from_gpr +registers: + - { id: 0, class: gpr32, preferred-register: '' } + - { id: 1, class: gpr64common, preferred-register: '' } + - { id: 2, class: gpr64, preferred-register: '' } + - { id: 3, class: gpr64all, preferred-register: '' } + - { id: 4, class: gpr64common, preferred-register: '' } + - { id: 5, class: gpr64common, preferred-register: '' } + - { id: 6, class: fpr128, preferred-register: '' } + - { id: 7, class: fpr128, preferred-register: '' } +liveins: + - { reg: '$w0', virtual-reg: '%0' } + - { reg: '$x1', virtual-reg: '%1' } +stack: + - { id: 0, name: '', type: default, offset: 0, size: 16, alignment: 16, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -16, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } +body: | + bb.0.entry: + liveins: $w0, $x1 + + ; CHECK-LABEL: name: insert_vec_from_gpr + ; CHECK: liveins: $w0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], [[COPY1]], %subreg.sub_32 + ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64common = ADDXri %stack.0, 0, 0 + ; CHECK-NEXT: [[BFMXri:%[0-9]+]]:gpr64common = BFMXri [[ADDXri]], killed [[INSERT_SUBREG]], 62, 1 + ; CHECK-NEXT: STRWui [[COPY1]], killed [[BFMXri]], 0 :: (store (s32)) + ; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0) + ; CHECK-NEXT: [[INSvi32gpr:%[0-9]+]]:fpr128 = INSvi32gpr [[LDRQui]], 1, [[COPY1]] + ; CHECK-NEXT: STRQui killed [[INSvi32gpr]], [[COPY]], 0 :: (store (s128) into %ir.p, align 4) + ; CHECK-NEXT: RET_ReallyLR + %1:gpr64common = COPY $x1 + %0:gpr32 = COPY $w0 + %3:gpr64all = IMPLICIT_DEF + %2:gpr64 = INSERT_SUBREG %3, %0, %subreg.sub_32 + %4:gpr64common = ADDXri %stack.0, 0, 0 + %5:gpr64common = BFMXri %4, killed %2, 62, 1 + STRWui %0, killed %5, 0 :: (store (s32)) + %6:fpr128 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0) + %7:fpr128 = INSvi32gpr %6, 1, %0 + STRQui killed %7, %1, 0 :: (store (s128) into %ir.p, align 4) + RET_ReallyLR + +... +