Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -5288,6 +5288,26 @@ defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>; defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>; +def : Pat<(v4i16 (concat_vectors (v2i16 (trunc (v2i32 V64:$Vn))), + (v2i16 (trunc (v2i32 V64:$Vm))))), + (UZP1v4i16 V64:$Vn, V64:$Vm)>; + +def : Pat<(v8i8 (concat_vectors (v4i8 (trunc (v4i16 V64:$Vn))), + (v4i8 (trunc (v4i16 V64:$Vm))))), + (UZP1v8i8 V64:$Vn, V64:$Vm)>; + +def : Pat<(v4i32 (concat_vectors (v2i32 (trunc (v2i64 V128:$Vn))), + (v2i32 (trunc (v2i64 V128:$Vm))))), + (UZP1v4i32 V128:$Vn, V128:$Vm)>; + +def : Pat<(v8i16 (concat_vectors (v4i16 (trunc (v4i32 V128:$Vn))), + (v4i16 (trunc (v4i32 V128:$Vm))))), + (UZP1v8i16 V128:$Vn, V128:$Vm)>; + +def : Pat<(v16i8 (concat_vectors (v8i8 (trunc (v8i16 V128:$Vn))), + (v8i8 (trunc (v8i16 V128:$Vm))))), + (UZP1v16i8 V128:$Vn, V128:$Vm)>; + //---------------------------------------------------------------------------- // AdvSIMD TBL/TBX instructions //---------------------------------------------------------------------------- Index: llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll +++ llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll @@ -57,9 +57,8 @@ define <4 x i16> @trunc_v4i64_to_v4i16(<4 x i64>* %ptr) { ; CHECK-LABEL: trunc_v4i64_to_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: xtn v0.2s, v0.2d -; CHECK-NEXT: xtn2 v0.4s, v1.2d +; CHECK-NEXT: ldp q1, q0, [x0] +; CHECK-NEXT: uzp1 v0.4s, v1.4s, v0.4s ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %tmp1 = load <4 x i64>, <4 x i64>* %ptr Index: llvm/test/CodeGen/AArch64/concat_vector-truncate-combine.ll =================================================================== --- llvm/test/CodeGen/AArch64/concat_vector-truncate-combine.ll +++ llvm/test/CodeGen/AArch64/concat_vector-truncate-combine.ll @@ -4,6 +4,28 @@ ; Test the (concat_vectors (trunc), (trunc)) pattern. +define <4 x i16> @test_concat_truncate_v2i32_to_v4i16(<2 x i32> %a, <2 x i32> %b) #0 { +entry: +; CHECK-LABEL: test_concat_truncate_v2i32_to_v4i16: +; CHECK-NEXT: uzp1.4h v0, v0, v1 +; CHECK-NEXT: ret + %at = trunc <2 x i32> %a to <2 x i16> + %bt = trunc <2 x i32> %b to <2 x i16> + %shuffle = shufflevector <2 x i16> %at, <2 x i16> %bt, <4 x i32> + ret <4 x i16> %shuffle +} + +define <8 x i8> @test_concat_truncate_v4i16_to_v8i8(<4 x i16> %a, <4 x i16> %b) #0 { +entry: +; CHECK-LABEL: test_concat_truncate_v4i16_to_v8i8: +; CHECK-NEXT: uzp1.8b v0, v0, v1 +; CHECK-NEXT: ret + %at = trunc <4 x i16> %a to <4 x i8> + %bt = trunc <4 x i16> %b to <4 x i8> + %shuffle = shufflevector <4 x i8> %at, <4 x i8> %bt, <8 x i32> + ret <8 x i8> %shuffle +} + define <4 x i16> @test_concat_truncate_v2i64_to_v4i16(<2 x i64> %a, <2 x i64> %b) #0 { entry: ; CHECK-LABEL: test_concat_truncate_v2i64_to_v4i16: @@ -16,6 +38,17 @@ ret <4 x i16> %shuffle } +define <4 x i32> @test_concat_truncate_v2i64_to_v4i32(<2 x i64> %a, <2 x i64> %b) #0 { +entry: +; CHECK-LABEL: test_concat_truncate_v2i64_to_v4i32: +; CHECK-NEXT: uzp1.4s v0, v0, v1 +; CHECK-NEXT: ret + %at = trunc <2 x i64> %a to <2 x i32> + %bt = trunc <2 x i64> %b to <2 x i32> + %shuffle = shufflevector <2 x i32> %at, <2 x i32> %bt, <4 x i32> + ret <4 x i32> %shuffle +} + define <8 x i8> @test_concat_truncate_v4i32_to_v8i8(<4 x i32> %a, <4 x i32> %b) #0 { entry: ; CHECK-LABEL: test_concat_truncate_v4i32_to_v8i8: @@ -31,8 +64,7 @@ define <8 x i16> @test_concat_truncate_v4i32_to_v8i16(<4 x i32> %a, <4 x i32> %b) #0 { entry: ; CHECK-LABEL: test_concat_truncate_v4i32_to_v8i16: -; CHECK-NEXT: xtn.4h v0, v0 -; CHECK-NEXT: xtn2.8h v0, v1 +; CHECK-NEXT: uzp1.8h v0, v0, v1 ; CHECK-NEXT: ret %at = trunc <4 x i32> %a to <4 x i16> %bt = trunc <4 x i32> %b to <4 x i16> @@ -40,6 +72,16 @@ ret <8 x i16> %shuffle } +define <16 x i8> @test_concat_truncate_v8i16_to_v16i8(<8 x i16> %a, <8 x i16> %b) #0 { +entry: +; CHECK-LABEL: test_concat_truncate_v8i16_to_v16i8: +; CHECK-NEXT: uzp1.16b v0, v0, v1 +; CHECK-NEXT: ret + %at = trunc <8 x i16> %a to <8 x i8> + %bt = trunc <8 x i16> %b to <8 x i8> + %shuffle = shufflevector <8 x i8> %at, <8 x i8> %bt, <16 x i32> + ret <16 x i8> %shuffle +} ; The concat_vectors operation in this test is introduced when splitting ; the fptrunc operation due to the split input operand. Index: llvm/test/CodeGen/AArch64/dag-combine-trunc-build-vec.ll =================================================================== --- llvm/test/CodeGen/AArch64/dag-combine-trunc-build-vec.ll +++ llvm/test/CodeGen/AArch64/dag-combine-trunc-build-vec.ll @@ -14,8 +14,7 @@ ; CHECK-NEXT: dup v1.4s, w0 ; CHECK-NEXT: xtn v1.4h, v1.4s ; CHECK-NEXT: mov v1.d[1], v0.d[0] -; CHECK-NEXT: xtn v0.8b, v1.8h -; CHECK-NEXT: xtn2 v0.16b, v1.8h +; CHECK-NEXT: uzp1 v0.16b, v1.16b, v1.16b ; CHECK-NEXT: str q0, [x8] ; CHECK-NEXT: ret Index: llvm/test/CodeGen/AArch64/fcvt_combine.ll =================================================================== --- llvm/test/CodeGen/AArch64/fcvt_combine.ll +++ llvm/test/CodeGen/AArch64/fcvt_combine.ll @@ -180,50 +180,49 @@ define <8 x i16> @test_v8f16(<8 x half> %in) { ; CHECK-NO16-LABEL: test_v8f16: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: mov h2, v0.h[1] -; CHECK-NO16-NEXT: mov h3, v0.h[4] -; CHECK-NO16-NEXT: mov h4, v0.h[5] +; CHECK-NO16-NEXT: mov h2, v0.h[4] +; CHECK-NO16-NEXT: mov h3, v0.h[5] +; CHECK-NO16-NEXT: mov h4, v0.h[1] ; CHECK-NO16-NEXT: mov h5, v0.h[2] -; CHECK-NO16-NEXT: fcvt s6, h0 +; CHECK-NO16-NEXT: mov h6, v0.h[6] +; CHECK-NO16-NEXT: fcvt s7, h0 ; CHECK-NO16-NEXT: fmov s1, #4.00000000 -; CHECK-NO16-NEXT: mov h7, v0.h[6] ; CHECK-NO16-NEXT: mov h16, v0.h[3] ; CHECK-NO16-NEXT: fcvt s2, h2 ; CHECK-NO16-NEXT: fcvt s3, h3 ; CHECK-NO16-NEXT: fcvt s4, h4 -; CHECK-NO16-NEXT: fcvt s5, h5 -; CHECK-NO16-NEXT: fmul s6, s6, s1 ; CHECK-NO16-NEXT: mov h0, v0.h[7] -; CHECK-NO16-NEXT: fcvt s7, h7 +; CHECK-NO16-NEXT: fcvt s5, h5 +; CHECK-NO16-NEXT: fcvt s6, h6 +; CHECK-NO16-NEXT: fmul s7, s7, s1 ; CHECK-NO16-NEXT: fcvt s16, h16 ; CHECK-NO16-NEXT: fmul s2, s2, s1 ; CHECK-NO16-NEXT: fmul s3, s3, s1 ; CHECK-NO16-NEXT: fmul s4, s4, s1 -; CHECK-NO16-NEXT: fmul s5, s5, s1 -; CHECK-NO16-NEXT: fcvt h6, s6 ; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s7, s7, s1 +; CHECK-NO16-NEXT: fmul s5, s5, s1 +; CHECK-NO16-NEXT: fmul s6, s6, s1 +; CHECK-NO16-NEXT: fcvt h7, s7 ; CHECK-NO16-NEXT: fmul s16, s16, s1 ; CHECK-NO16-NEXT: fcvt h2, s2 ; CHECK-NO16-NEXT: fcvt h3, s3 ; CHECK-NO16-NEXT: fcvt h4, s4 -; CHECK-NO16-NEXT: fcvt h5, s5 ; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h1, s7 -; CHECK-NO16-NEXT: mov v6.h[1], v2.h[0] -; CHECK-NO16-NEXT: fcvt h2, s16 -; CHECK-NO16-NEXT: mov v3.h[1], v4.h[0] +; CHECK-NO16-NEXT: fcvt h1, s5 +; CHECK-NO16-NEXT: fcvt h5, s6 +; CHECK-NO16-NEXT: mov v2.h[1], v3.h[0] +; CHECK-NO16-NEXT: fcvt h3, s16 +; CHECK-NO16-NEXT: mov v7.h[1], v4.h[0] ; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: mov v6.h[2], v5.h[0] -; CHECK-NO16-NEXT: mov v3.h[2], v1.h[0] -; CHECK-NO16-NEXT: mov v6.h[3], v2.h[0] -; CHECK-NO16-NEXT: mov v3.h[3], v0.h[0] -; CHECK-NO16-NEXT: fcvtl v0.4s, v6.4h -; CHECK-NO16-NEXT: fcvtl v1.4s, v3.4h +; CHECK-NO16-NEXT: mov v2.h[2], v5.h[0] +; CHECK-NO16-NEXT: mov v7.h[2], v1.h[0] +; CHECK-NO16-NEXT: mov v2.h[3], v0.h[0] +; CHECK-NO16-NEXT: mov v7.h[3], v3.h[0] +; CHECK-NO16-NEXT: fcvtl v0.4s, v2.4h +; CHECK-NO16-NEXT: fcvtl v1.4s, v7.4h ; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s ; CHECK-NO16-NEXT: fcvtzs v1.4s, v1.4s -; CHECK-NO16-NEXT: xtn v0.4h, v0.4s -; CHECK-NO16-NEXT: xtn2 v0.8h, v1.4s +; CHECK-NO16-NEXT: uzp1 v0.8h, v1.8h, v0.8h ; CHECK-NO16-NEXT: ret ; ; CHECK-FP16-LABEL: test_v8f16: @@ -547,31 +546,30 @@ ; CHECK-NO16-NEXT: cmp w14, w9 ; CHECK-NO16-NEXT: csel w14, w14, w9, lt ; CHECK-NO16-NEXT: cmn w14, #8, lsl #12 // =32768 -; CHECK-NO16-NEXT: fcvtzs w16, s0 ; CHECK-NO16-NEXT: csel w14, w14, w10, gt ; CHECK-NO16-NEXT: cmp w15, w9 ; CHECK-NO16-NEXT: csel w15, w15, w9, lt -; CHECK-NO16-NEXT: mov s0, v1.s[3] ; CHECK-NO16-NEXT: cmn w15, #8, lsl #12 // =32768 -; CHECK-NO16-NEXT: csel w15, w15, w10, gt -; CHECK-NO16-NEXT: cmp w16, w9 -; CHECK-NO16-NEXT: csel w11, w16, w9, lt -; CHECK-NO16-NEXT: cmn w11, #8, lsl #12 // =32768 -; CHECK-NO16-NEXT: fmov s1, w15 +; CHECK-NO16-NEXT: csel w11, w15, w10, gt ; CHECK-NO16-NEXT: fcvtzs w15, s0 -; CHECK-NO16-NEXT: csel w11, w11, w10, gt +; CHECK-NO16-NEXT: mov s0, v1.s[3] ; CHECK-NO16-NEXT: mov v2.s[1], w8 -; CHECK-NO16-NEXT: mov v1.s[1], w14 +; CHECK-NO16-NEXT: fmov s1, w11 ; CHECK-NO16-NEXT: cmp w15, w9 ; CHECK-NO16-NEXT: csel w8, w15, w9, lt +; CHECK-NO16-NEXT: fcvtzs w11, s0 ; CHECK-NO16-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-NO16-NEXT: mov v1.s[1], w14 ; CHECK-NO16-NEXT: csel w8, w8, w10, gt -; CHECK-NO16-NEXT: mov v1.s[2], w11 ; CHECK-NO16-NEXT: mov v2.s[2], w12 -; CHECK-NO16-NEXT: mov v1.s[3], w8 +; CHECK-NO16-NEXT: cmp w11, w9 +; CHECK-NO16-NEXT: csel w9, w11, w9, lt +; CHECK-NO16-NEXT: mov v1.s[2], w8 +; CHECK-NO16-NEXT: cmn w9, #8, lsl #12 // =32768 +; CHECK-NO16-NEXT: csel w8, w9, w10, gt ; CHECK-NO16-NEXT: mov v2.s[3], w13 -; CHECK-NO16-NEXT: xtn v0.4h, v1.4s -; CHECK-NO16-NEXT: xtn2 v0.8h, v2.4s +; CHECK-NO16-NEXT: mov v1.s[3], w8 +; CHECK-NO16-NEXT: uzp1 v0.8h, v1.8h, v2.8h ; CHECK-NO16-NEXT: ret ; ; CHECK-FP16-LABEL: test_v8f16_sat: Index: llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll =================================================================== --- llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll +++ llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll @@ -661,13 +661,12 @@ define <8 x i8> @fptosi_i8(<8 x half> %a) #0 { ; CHECK-CVT-LABEL: fptosi_i8: ; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h -; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-NEXT: fcvtzs v1.4s, v1.4s ; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-CVT-NEXT: xtn v1.4h, v1.4s -; CHECK-CVT-NEXT: xtn2 v1.8h, v0.4s -; CHECK-CVT-NEXT: xtn v0.8b, v1.8h +; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-CVT-NEXT: xtn v0.8b, v0.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: fptosi_i8: @@ -689,12 +688,11 @@ ; CHECK-COMMON_NEXT: ret ; CHECK-CVT-LABEL: fptosi_i16: ; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h -; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-NEXT: fcvtzs v1.4s, v1.4s -; CHECK-CVT-NEXT: fcvtzs v2.4s, v0.4s -; CHECK-CVT-NEXT: xtn v0.4h, v1.4s -; CHECK-CVT-NEXT: xtn2 v0.8h, v2.4s +; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: fptosi_i16: @@ -708,13 +706,12 @@ define <8 x i8> @fptoui_i8(<8 x half> %a) #0 { ; CHECK-CVT-LABEL: fptoui_i8: ; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h -; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-NEXT: fcvtzu v1.4s, v1.4s ; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s -; CHECK-CVT-NEXT: xtn v1.4h, v1.4s -; CHECK-CVT-NEXT: xtn2 v1.8h, v0.4s -; CHECK-CVT-NEXT: xtn v0.8b, v1.8h +; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-CVT-NEXT: xtn v0.8b, v0.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: fptoui_i8: @@ -729,12 +726,11 @@ define <8 x i16> @fptoui_i16(<8 x half> %a) #0 { ; CHECK-CVT-LABEL: fptoui_i16: ; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h -; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-NEXT: fcvtzu v1.4s, v1.4s -; CHECK-CVT-NEXT: fcvtzu v2.4s, v0.4s -; CHECK-CVT-NEXT: xtn v0.4h, v1.4s -; CHECK-CVT-NEXT: xtn2 v0.8h, v2.4s +; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: fptoui_i16: Index: llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll =================================================================== --- llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll +++ llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll @@ -80,8 +80,8 @@ define <4 x i32> @utest_f32i32(<4 x float> %x) { ; CHECK-LABEL: utest_f32i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtl v2.2d, v0.2s -; CHECK-NEXT: fcvtl2 v0.2d, v0.4s +; CHECK-NEXT: fcvtl2 v2.2d, v0.4s +; CHECK-NEXT: fcvtl v0.2d, v0.2s ; CHECK-NEXT: movi v1.2d, #0x000000ffffffff ; CHECK-NEXT: fcvtzu v2.2d, v2.2d ; CHECK-NEXT: fcvtzu v0.2d, v0.2d @@ -90,9 +90,8 @@ ; CHECK-NEXT: and v2.16b, v2.16b, v3.16b ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: orn v2.16b, v2.16b, v3.16b -; CHECK-NEXT: orn v1.16b, v0.16b, v1.16b -; CHECK-NEXT: xtn v0.2s, v2.2d -; CHECK-NEXT: xtn2 v0.4s, v1.2d +; CHECK-NEXT: orn v0.16b, v0.16b, v1.16b +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret entry: %conv = fptoui <4 x float> %x to <4 x i64> @@ -138,54 +137,52 @@ ; CHECK-CVT: // %bb.0: // %entry ; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-CVT-NEXT: mov h2, v0.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[1] -; CHECK-CVT-NEXT: fcvt s4, h0 -; CHECK-CVT-NEXT: mov h0, v0.h[3] +; CHECK-CVT-NEXT: mov h3, v0.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[1] +; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: movi v1.2d, #0x000000ffffffff ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvtzu x8, s4 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvtzu x9, s2 -; CHECK-CVT-NEXT: fmov d2, x8 +; CHECK-CVT-NEXT: fcvtzu x9, s0 +; CHECK-CVT-NEXT: fcvtzu x8, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: fmov d0, x8 ; CHECK-CVT-NEXT: fcvtzu x8, s3 ; CHECK-CVT-NEXT: fmov d3, x9 -; CHECK-CVT-NEXT: fcvtzu x9, s0 -; CHECK-CVT-NEXT: mov v2.d[1], x8 +; CHECK-CVT-NEXT: fcvtzu x9, s2 +; CHECK-CVT-NEXT: mov v0.d[1], x8 ; CHECK-CVT-NEXT: mov v3.d[1], x9 -; CHECK-CVT-NEXT: cmhi v0.2d, v1.2d, v2.2d +; CHECK-CVT-NEXT: cmhi v2.2d, v1.2d, v0.2d ; CHECK-CVT-NEXT: cmhi v1.2d, v1.2d, v3.2d -; CHECK-CVT-NEXT: and v2.16b, v2.16b, v0.16b +; CHECK-CVT-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-CVT-NEXT: and v3.16b, v3.16b, v1.16b -; CHECK-CVT-NEXT: orn v0.16b, v2.16b, v0.16b +; CHECK-CVT-NEXT: orn v0.16b, v0.16b, v2.16b ; CHECK-CVT-NEXT: orn v1.16b, v3.16b, v1.16b -; CHECK-CVT-NEXT: xtn v0.2s, v0.2d -; CHECK-CVT-NEXT: xtn2 v0.4s, v1.2d +; CHECK-CVT-NEXT: uzp1 v0.4s, v1.4s, v0.4s ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: utesth_f16i32: ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-FP16-NEXT: mov h2, v0.h[2] -; CHECK-FP16-NEXT: mov h3, v0.h[1] -; CHECK-FP16-NEXT: fcvtzu x8, h0 -; CHECK-FP16-NEXT: mov h0, v0.h[3] +; CHECK-FP16-NEXT: mov h3, v0.h[3] +; CHECK-FP16-NEXT: fcvtzu x9, h0 ; CHECK-FP16-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-FP16-NEXT: fcvtzu x9, h2 -; CHECK-FP16-NEXT: fmov d2, x8 +; CHECK-FP16-NEXT: fcvtzu x8, h2 +; CHECK-FP16-NEXT: mov h2, v0.h[1] +; CHECK-FP16-NEXT: fmov d0, x8 ; CHECK-FP16-NEXT: fcvtzu x8, h3 ; CHECK-FP16-NEXT: fmov d3, x9 -; CHECK-FP16-NEXT: fcvtzu x9, h0 -; CHECK-FP16-NEXT: mov v2.d[1], x8 +; CHECK-FP16-NEXT: fcvtzu x9, h2 +; CHECK-FP16-NEXT: mov v0.d[1], x8 ; CHECK-FP16-NEXT: mov v3.d[1], x9 -; CHECK-FP16-NEXT: cmhi v0.2d, v1.2d, v2.2d +; CHECK-FP16-NEXT: cmhi v2.2d, v1.2d, v0.2d ; CHECK-FP16-NEXT: cmhi v1.2d, v1.2d, v3.2d -; CHECK-FP16-NEXT: and v2.16b, v2.16b, v0.16b +; CHECK-FP16-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-FP16-NEXT: and v3.16b, v3.16b, v1.16b -; CHECK-FP16-NEXT: orn v0.16b, v2.16b, v0.16b +; CHECK-FP16-NEXT: orn v0.16b, v0.16b, v2.16b ; CHECK-FP16-NEXT: orn v1.16b, v3.16b, v1.16b -; CHECK-FP16-NEXT: xtn v0.2s, v0.2d -; CHECK-FP16-NEXT: xtn2 v0.4s, v1.2d +; CHECK-FP16-NEXT: uzp1 v0.4s, v1.4s, v0.4s ; CHECK-FP16-NEXT: ret entry: %conv = fptoui <4 x half> %x to <4 x i64> @@ -324,10 +321,11 @@ ; CHECK-CVT-NEXT: mvni v3.4s, #127, msl #8 ; CHECK-CVT-NEXT: fcvtzs v2.4s, v2.4s ; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-CVT-NEXT: smin v1.4s, v2.4s, v1.4s -; CHECK-CVT-NEXT: sqxtn v0.4h, v0.4s -; CHECK-CVT-NEXT: smax v1.4s, v1.4s, v3.4s -; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s +; CHECK-CVT-NEXT: smin v2.4s, v2.4s, v1.4s +; CHECK-CVT-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-CVT-NEXT: smax v1.4s, v2.4s, v3.4s +; CHECK-CVT-NEXT: smax v0.4s, v0.4s, v3.4s +; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: stest_f16i16: @@ -352,9 +350,9 @@ ; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff ; CHECK-NEXT: fcvtzu v2.4s, v2.4s ; CHECK-NEXT: fcvtzu v0.4s, v0.4s -; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s -; CHECK-NEXT: uqxtn v0.4h, v0.4s -; CHECK-NEXT: xtn2 v0.8h, v1.4s +; CHECK-NEXT: umin v2.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h ; CHECK-NEXT: ret entry: %conv = fptoui <8 x half> %x to <8 x i32> @@ -372,9 +370,9 @@ ; CHECK-CVT-NEXT: movi v1.2d, #0x00ffff0000ffff ; CHECK-CVT-NEXT: fcvtzu v2.4s, v2.4s ; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s -; CHECK-CVT-NEXT: umin v1.4s, v2.4s, v1.4s -; CHECK-CVT-NEXT: uqxtn v0.4h, v0.4s -; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s +; CHECK-CVT-NEXT: umin v2.4s, v2.4s, v1.4s +; CHECK-CVT-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: ustest_f16i16: @@ -813,8 +811,8 @@ define <4 x i32> @utest_f32i32_mm(<4 x float> %x) { ; CHECK-LABEL: utest_f32i32_mm: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtl v2.2d, v0.2s -; CHECK-NEXT: fcvtl2 v0.2d, v0.4s +; CHECK-NEXT: fcvtl2 v2.2d, v0.4s +; CHECK-NEXT: fcvtl v0.2d, v0.2s ; CHECK-NEXT: movi v1.2d, #0x000000ffffffff ; CHECK-NEXT: fcvtzu v2.2d, v2.2d ; CHECK-NEXT: fcvtzu v0.2d, v0.2d @@ -823,9 +821,8 @@ ; CHECK-NEXT: and v2.16b, v2.16b, v3.16b ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: orn v2.16b, v2.16b, v3.16b -; CHECK-NEXT: orn v1.16b, v0.16b, v1.16b -; CHECK-NEXT: xtn v0.2s, v2.2d -; CHECK-NEXT: xtn2 v0.4s, v1.2d +; CHECK-NEXT: orn v0.16b, v0.16b, v1.16b +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret entry: %conv = fptoui <4 x float> %x to <4 x i64> @@ -866,54 +863,52 @@ ; CHECK-CVT: // %bb.0: // %entry ; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-CVT-NEXT: mov h2, v0.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[1] -; CHECK-CVT-NEXT: fcvt s4, h0 -; CHECK-CVT-NEXT: mov h0, v0.h[3] +; CHECK-CVT-NEXT: mov h3, v0.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[1] +; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: movi v1.2d, #0x000000ffffffff ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvtzu x8, s4 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvtzu x9, s2 -; CHECK-CVT-NEXT: fmov d2, x8 +; CHECK-CVT-NEXT: fcvtzu x9, s0 +; CHECK-CVT-NEXT: fcvtzu x8, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: fmov d0, x8 ; CHECK-CVT-NEXT: fcvtzu x8, s3 ; CHECK-CVT-NEXT: fmov d3, x9 -; CHECK-CVT-NEXT: fcvtzu x9, s0 -; CHECK-CVT-NEXT: mov v2.d[1], x8 +; CHECK-CVT-NEXT: fcvtzu x9, s2 +; CHECK-CVT-NEXT: mov v0.d[1], x8 ; CHECK-CVT-NEXT: mov v3.d[1], x9 -; CHECK-CVT-NEXT: cmhi v0.2d, v1.2d, v2.2d +; CHECK-CVT-NEXT: cmhi v2.2d, v1.2d, v0.2d ; CHECK-CVT-NEXT: cmhi v1.2d, v1.2d, v3.2d -; CHECK-CVT-NEXT: and v2.16b, v2.16b, v0.16b +; CHECK-CVT-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-CVT-NEXT: and v3.16b, v3.16b, v1.16b -; CHECK-CVT-NEXT: orn v0.16b, v2.16b, v0.16b +; CHECK-CVT-NEXT: orn v0.16b, v0.16b, v2.16b ; CHECK-CVT-NEXT: orn v1.16b, v3.16b, v1.16b -; CHECK-CVT-NEXT: xtn v0.2s, v0.2d -; CHECK-CVT-NEXT: xtn2 v0.4s, v1.2d +; CHECK-CVT-NEXT: uzp1 v0.4s, v1.4s, v0.4s ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: utesth_f16i32_mm: ; CHECK-FP16: // %bb.0: // %entry ; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-FP16-NEXT: mov h2, v0.h[2] -; CHECK-FP16-NEXT: mov h3, v0.h[1] -; CHECK-FP16-NEXT: fcvtzu x8, h0 -; CHECK-FP16-NEXT: mov h0, v0.h[3] +; CHECK-FP16-NEXT: mov h3, v0.h[3] +; CHECK-FP16-NEXT: fcvtzu x9, h0 ; CHECK-FP16-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-FP16-NEXT: fcvtzu x9, h2 -; CHECK-FP16-NEXT: fmov d2, x8 +; CHECK-FP16-NEXT: fcvtzu x8, h2 +; CHECK-FP16-NEXT: mov h2, v0.h[1] +; CHECK-FP16-NEXT: fmov d0, x8 ; CHECK-FP16-NEXT: fcvtzu x8, h3 ; CHECK-FP16-NEXT: fmov d3, x9 -; CHECK-FP16-NEXT: fcvtzu x9, h0 -; CHECK-FP16-NEXT: mov v2.d[1], x8 +; CHECK-FP16-NEXT: fcvtzu x9, h2 +; CHECK-FP16-NEXT: mov v0.d[1], x8 ; CHECK-FP16-NEXT: mov v3.d[1], x9 -; CHECK-FP16-NEXT: cmhi v0.2d, v1.2d, v2.2d +; CHECK-FP16-NEXT: cmhi v2.2d, v1.2d, v0.2d ; CHECK-FP16-NEXT: cmhi v1.2d, v1.2d, v3.2d -; CHECK-FP16-NEXT: and v2.16b, v2.16b, v0.16b +; CHECK-FP16-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-FP16-NEXT: and v3.16b, v3.16b, v1.16b -; CHECK-FP16-NEXT: orn v0.16b, v2.16b, v0.16b +; CHECK-FP16-NEXT: orn v0.16b, v0.16b, v2.16b ; CHECK-FP16-NEXT: orn v1.16b, v3.16b, v1.16b -; CHECK-FP16-NEXT: xtn v0.2s, v0.2d -; CHECK-FP16-NEXT: xtn2 v0.4s, v1.2d +; CHECK-FP16-NEXT: uzp1 v0.4s, v1.4s, v0.4s ; CHECK-FP16-NEXT: ret entry: %conv = fptoui <4 x half> %x to <4 x i64> @@ -1039,10 +1034,11 @@ ; CHECK-CVT-NEXT: mvni v3.4s, #127, msl #8 ; CHECK-CVT-NEXT: fcvtzs v2.4s, v2.4s ; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-CVT-NEXT: smin v1.4s, v2.4s, v1.4s -; CHECK-CVT-NEXT: sqxtn v0.4h, v0.4s -; CHECK-CVT-NEXT: smax v1.4s, v1.4s, v3.4s -; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s +; CHECK-CVT-NEXT: smin v2.4s, v2.4s, v1.4s +; CHECK-CVT-NEXT: smin v0.4s, v0.4s, v1.4s +; CHECK-CVT-NEXT: smax v1.4s, v2.4s, v3.4s +; CHECK-CVT-NEXT: smax v0.4s, v0.4s, v3.4s +; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: stest_f16i16_mm: @@ -1065,9 +1061,9 @@ ; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff ; CHECK-NEXT: fcvtzu v2.4s, v2.4s ; CHECK-NEXT: fcvtzu v0.4s, v0.4s -; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s -; CHECK-NEXT: uqxtn v0.4h, v0.4s -; CHECK-NEXT: xtn2 v0.8h, v1.4s +; CHECK-NEXT: umin v2.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h ; CHECK-NEXT: ret entry: %conv = fptoui <8 x half> %x to <8 x i32> @@ -1084,9 +1080,9 @@ ; CHECK-CVT-NEXT: movi v1.2d, #0x00ffff0000ffff ; CHECK-CVT-NEXT: fcvtzu v2.4s, v2.4s ; CHECK-CVT-NEXT: fcvtzu v0.4s, v0.4s -; CHECK-CVT-NEXT: umin v1.4s, v2.4s, v1.4s -; CHECK-CVT-NEXT: uqxtn v0.4h, v0.4s -; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s +; CHECK-CVT-NEXT: umin v2.4s, v2.4s, v1.4s +; CHECK-CVT-NEXT: umin v0.4s, v0.4s, v1.4s +; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v2.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: ustest_f16i16_mm: Index: llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll =================================================================== --- llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -2093,9 +2093,8 @@ ; CHECK-CVT-NEXT: csinv w10, w10, wzr, ge ; CHECK-CVT-NEXT: cmp w11, #0 ; CHECK-CVT-NEXT: csel w11, w11, wzr, lt -; CHECK-CVT-NEXT: fcvtzs w14, s1 -; CHECK-CVT-NEXT: cmp w11, #0 ; CHECK-CVT-NEXT: fmov s2, w9 +; CHECK-CVT-NEXT: cmp w11, #0 ; CHECK-CVT-NEXT: csinv w11, w11, wzr, ge ; CHECK-CVT-NEXT: cmp w12, #0 ; CHECK-CVT-NEXT: csel w12, w12, wzr, lt @@ -2104,25 +2103,25 @@ ; CHECK-CVT-NEXT: cmp w13, #0 ; CHECK-CVT-NEXT: csel w13, w13, wzr, lt ; CHECK-CVT-NEXT: cmp w13, #0 -; CHECK-CVT-NEXT: csinv w13, w13, wzr, ge -; CHECK-CVT-NEXT: cmp w14, #0 -; CHECK-CVT-NEXT: csel w9, w14, wzr, lt -; CHECK-CVT-NEXT: cmp w9, #0 -; CHECK-CVT-NEXT: fmov s1, w13 -; CHECK-CVT-NEXT: fcvtzs w13, s0 -; CHECK-CVT-NEXT: csinv w9, w9, wzr, ge +; CHECK-CVT-NEXT: csinv w9, w13, wzr, ge +; CHECK-CVT-NEXT: fcvtzs w13, s1 ; CHECK-CVT-NEXT: mov v2.s[1], w8 -; CHECK-CVT-NEXT: mov v1.s[1], w12 +; CHECK-CVT-NEXT: fmov s1, w9 ; CHECK-CVT-NEXT: cmp w13, #0 ; CHECK-CVT-NEXT: csel w8, w13, wzr, lt +; CHECK-CVT-NEXT: fcvtzs w9, s0 ; CHECK-CVT-NEXT: cmp w8, #0 +; CHECK-CVT-NEXT: mov v1.s[1], w12 ; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge -; CHECK-CVT-NEXT: mov v1.s[2], w9 +; CHECK-CVT-NEXT: cmp w9, #0 +; CHECK-CVT-NEXT: csel w9, w9, wzr, lt ; CHECK-CVT-NEXT: mov v2.s[2], w10 -; CHECK-CVT-NEXT: mov v1.s[3], w8 +; CHECK-CVT-NEXT: cmp w9, #0 +; CHECK-CVT-NEXT: mov v1.s[2], w8 +; CHECK-CVT-NEXT: csinv w8, w9, wzr, ge ; CHECK-CVT-NEXT: mov v2.s[3], w11 -; CHECK-CVT-NEXT: xtn v0.4h, v1.4s -; CHECK-CVT-NEXT: xtn2 v0.8h, v2.4s +; CHECK-CVT-NEXT: mov v1.s[3], w8 +; CHECK-CVT-NEXT: uzp1 v0.8h, v1.8h, v2.8h ; CHECK-CVT-NEXT: xtn v0.8b, v0.8h ; CHECK-CVT-NEXT: ret ; @@ -2172,9 +2171,8 @@ ; CHECK-CVT-NEXT: csel w12, w12, w10, gt ; CHECK-CVT-NEXT: cmp w13, #127 ; CHECK-CVT-NEXT: csel w13, w13, w8, lt -; CHECK-CVT-NEXT: fcvtzs w16, s1 -; CHECK-CVT-NEXT: cmn w13, #128 ; CHECK-CVT-NEXT: fmov s2, w11 +; CHECK-CVT-NEXT: cmn w13, #128 ; CHECK-CVT-NEXT: csel w13, w13, w10, gt ; CHECK-CVT-NEXT: cmp w14, #127 ; CHECK-CVT-NEXT: csel w14, w14, w8, lt @@ -2183,25 +2181,25 @@ ; CHECK-CVT-NEXT: cmp w15, #127 ; CHECK-CVT-NEXT: csel w15, w15, w8, lt ; CHECK-CVT-NEXT: cmn w15, #128 -; CHECK-CVT-NEXT: csel w15, w15, w10, gt -; CHECK-CVT-NEXT: cmp w16, #127 -; CHECK-CVT-NEXT: csel w11, w16, w8, lt -; CHECK-CVT-NEXT: cmn w11, #128 -; CHECK-CVT-NEXT: fmov s1, w15 -; CHECK-CVT-NEXT: fcvtzs w15, s0 -; CHECK-CVT-NEXT: csel w11, w11, w10, gt +; CHECK-CVT-NEXT: csel w11, w15, w10, gt +; CHECK-CVT-NEXT: fcvtzs w15, s1 ; CHECK-CVT-NEXT: mov v2.s[1], w9 -; CHECK-CVT-NEXT: mov v1.s[1], w14 +; CHECK-CVT-NEXT: fmov s1, w11 ; CHECK-CVT-NEXT: cmp w15, #127 -; CHECK-CVT-NEXT: csel w8, w15, w8, lt +; CHECK-CVT-NEXT: csel w9, w15, w8, lt +; CHECK-CVT-NEXT: fcvtzs w11, s0 +; CHECK-CVT-NEXT: cmn w9, #128 +; CHECK-CVT-NEXT: mov v1.s[1], w14 +; CHECK-CVT-NEXT: csel w9, w9, w10, gt +; CHECK-CVT-NEXT: cmp w11, #127 +; CHECK-CVT-NEXT: csel w8, w11, w8, lt +; CHECK-CVT-NEXT: mov v2.s[2], w12 ; CHECK-CVT-NEXT: cmn w8, #128 +; CHECK-CVT-NEXT: mov v1.s[2], w9 ; CHECK-CVT-NEXT: csel w8, w8, w10, gt -; CHECK-CVT-NEXT: mov v1.s[2], w11 -; CHECK-CVT-NEXT: mov v2.s[2], w12 -; CHECK-CVT-NEXT: mov v1.s[3], w8 ; CHECK-CVT-NEXT: mov v2.s[3], w13 -; CHECK-CVT-NEXT: xtn v0.4h, v1.4s -; CHECK-CVT-NEXT: xtn2 v0.8h, v2.4s +; CHECK-CVT-NEXT: mov v1.s[3], w8 +; CHECK-CVT-NEXT: uzp1 v0.8h, v1.8h, v2.8h ; CHECK-CVT-NEXT: xtn v0.8b, v0.8h ; CHECK-CVT-NEXT: ret ; @@ -2247,9 +2245,8 @@ ; CHECK-CVT-NEXT: csel w12, w12, w10, gt ; CHECK-CVT-NEXT: cmp w13, #4095 ; CHECK-CVT-NEXT: csel w13, w13, w8, lt -; CHECK-CVT-NEXT: fcvtzs w16, s1 -; CHECK-CVT-NEXT: cmn w13, #1, lsl #12 // =4096 ; CHECK-CVT-NEXT: fmov s2, w11 +; CHECK-CVT-NEXT: cmn w13, #1, lsl #12 // =4096 ; CHECK-CVT-NEXT: csel w13, w13, w10, gt ; CHECK-CVT-NEXT: cmp w14, #4095 ; CHECK-CVT-NEXT: csel w14, w14, w8, lt @@ -2258,25 +2255,25 @@ ; CHECK-CVT-NEXT: cmp w15, #4095 ; CHECK-CVT-NEXT: csel w15, w15, w8, lt ; CHECK-CVT-NEXT: cmn w15, #1, lsl #12 // =4096 -; CHECK-CVT-NEXT: csel w15, w15, w10, gt -; CHECK-CVT-NEXT: cmp w16, #4095 -; CHECK-CVT-NEXT: csel w11, w16, w8, lt -; CHECK-CVT-NEXT: cmn w11, #1, lsl #12 // =4096 -; CHECK-CVT-NEXT: fmov s1, w15 -; CHECK-CVT-NEXT: fcvtzs w15, s0 -; CHECK-CVT-NEXT: csel w11, w11, w10, gt +; CHECK-CVT-NEXT: csel w11, w15, w10, gt +; CHECK-CVT-NEXT: fcvtzs w15, s1 ; CHECK-CVT-NEXT: mov v2.s[1], w9 -; CHECK-CVT-NEXT: mov v1.s[1], w14 +; CHECK-CVT-NEXT: fmov s1, w11 ; CHECK-CVT-NEXT: cmp w15, #4095 -; CHECK-CVT-NEXT: csel w8, w15, w8, lt +; CHECK-CVT-NEXT: csel w9, w15, w8, lt +; CHECK-CVT-NEXT: fcvtzs w11, s0 +; CHECK-CVT-NEXT: cmn w9, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: mov v1.s[1], w14 +; CHECK-CVT-NEXT: csel w9, w9, w10, gt +; CHECK-CVT-NEXT: cmp w11, #4095 +; CHECK-CVT-NEXT: csel w8, w11, w8, lt +; CHECK-CVT-NEXT: mov v2.s[2], w12 ; CHECK-CVT-NEXT: cmn w8, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: mov v1.s[2], w9 ; CHECK-CVT-NEXT: csel w8, w8, w10, gt -; CHECK-CVT-NEXT: mov v1.s[2], w11 -; CHECK-CVT-NEXT: mov v2.s[2], w12 -; CHECK-CVT-NEXT: mov v1.s[3], w8 ; CHECK-CVT-NEXT: mov v2.s[3], w13 -; CHECK-CVT-NEXT: xtn v0.4h, v1.4s -; CHECK-CVT-NEXT: xtn2 v0.8h, v2.4s +; CHECK-CVT-NEXT: mov v1.s[3], w8 +; CHECK-CVT-NEXT: uzp1 v0.8h, v1.8h, v2.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v8f16_v8i13: @@ -2324,9 +2321,8 @@ ; CHECK-CVT-NEXT: csel w12, w12, w10, gt ; CHECK-CVT-NEXT: cmp w13, w8 ; CHECK-CVT-NEXT: csel w13, w13, w8, lt -; CHECK-CVT-NEXT: fcvtzs w16, s1 -; CHECK-CVT-NEXT: cmn w13, #8, lsl #12 // =32768 ; CHECK-CVT-NEXT: fmov s2, w11 +; CHECK-CVT-NEXT: cmn w13, #8, lsl #12 // =32768 ; CHECK-CVT-NEXT: csel w13, w13, w10, gt ; CHECK-CVT-NEXT: cmp w14, w8 ; CHECK-CVT-NEXT: csel w14, w14, w8, lt @@ -2335,25 +2331,25 @@ ; CHECK-CVT-NEXT: cmp w15, w8 ; CHECK-CVT-NEXT: csel w15, w15, w8, lt ; CHECK-CVT-NEXT: cmn w15, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: csel w15, w15, w10, gt -; CHECK-CVT-NEXT: cmp w16, w8 -; CHECK-CVT-NEXT: csel w11, w16, w8, lt -; CHECK-CVT-NEXT: cmn w11, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: fmov s1, w15 -; CHECK-CVT-NEXT: fcvtzs w15, s0 -; CHECK-CVT-NEXT: csel w11, w11, w10, gt +; CHECK-CVT-NEXT: csel w11, w15, w10, gt +; CHECK-CVT-NEXT: fcvtzs w15, s1 ; CHECK-CVT-NEXT: mov v2.s[1], w9 -; CHECK-CVT-NEXT: mov v1.s[1], w14 +; CHECK-CVT-NEXT: fmov s1, w11 ; CHECK-CVT-NEXT: cmp w15, w8 -; CHECK-CVT-NEXT: csel w8, w15, w8, lt +; CHECK-CVT-NEXT: csel w9, w15, w8, lt +; CHECK-CVT-NEXT: fcvtzs w11, s0 +; CHECK-CVT-NEXT: cmn w9, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: mov v1.s[1], w14 +; CHECK-CVT-NEXT: csel w9, w9, w10, gt +; CHECK-CVT-NEXT: cmp w11, w8 +; CHECK-CVT-NEXT: csel w8, w11, w8, lt +; CHECK-CVT-NEXT: mov v2.s[2], w12 ; CHECK-CVT-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: mov v1.s[2], w9 ; CHECK-CVT-NEXT: csel w8, w8, w10, gt -; CHECK-CVT-NEXT: mov v1.s[2], w11 -; CHECK-CVT-NEXT: mov v2.s[2], w12 -; CHECK-CVT-NEXT: mov v1.s[3], w8 ; CHECK-CVT-NEXT: mov v2.s[3], w13 -; CHECK-CVT-NEXT: xtn v0.4h, v1.4s -; CHECK-CVT-NEXT: xtn2 v0.8h, v2.4s +; CHECK-CVT-NEXT: mov v1.s[3], w8 +; CHECK-CVT-NEXT: uzp1 v0.8h, v1.8h, v2.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v8f16_v8i16: Index: llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll =================================================================== --- llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -1775,22 +1775,21 @@ ; CHECK-CVT-NEXT: csinc w13, w13, wzr, lo ; CHECK-CVT-NEXT: cmp w10, #1 ; CHECK-CVT-NEXT: csinc w10, w10, wzr, lo -; CHECK-CVT-NEXT: fmov s2, w10 -; CHECK-CVT-NEXT: fcvtzu w10, s1 -; CHECK-CVT-NEXT: fmov s1, w9 -; CHECK-CVT-NEXT: mov v2.s[1], w13 -; CHECK-CVT-NEXT: cmp w10, #1 -; CHECK-CVT-NEXT: csinc w9, w10, wzr, lo -; CHECK-CVT-NEXT: fcvtzu w10, s0 -; CHECK-CVT-NEXT: mov v1.s[1], w8 -; CHECK-CVT-NEXT: mov v2.s[2], w9 -; CHECK-CVT-NEXT: cmp w10, #1 -; CHECK-CVT-NEXT: csinc w8, w10, wzr, lo -; CHECK-CVT-NEXT: mov v1.s[2], w11 -; CHECK-CVT-NEXT: mov v2.s[3], w8 -; CHECK-CVT-NEXT: mov v1.s[3], w12 -; CHECK-CVT-NEXT: xtn v0.4h, v2.4s -; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s +; CHECK-CVT-NEXT: fmov s2, w9 +; CHECK-CVT-NEXT: fcvtzu w9, s1 +; CHECK-CVT-NEXT: fmov s3, w10 +; CHECK-CVT-NEXT: mov v2.s[1], w8 +; CHECK-CVT-NEXT: cmp w9, #1 +; CHECK-CVT-NEXT: csinc w8, w9, wzr, lo +; CHECK-CVT-NEXT: fcvtzu w9, s0 +; CHECK-CVT-NEXT: mov v3.s[1], w13 +; CHECK-CVT-NEXT: mov v2.s[2], w11 +; CHECK-CVT-NEXT: cmp w9, #1 +; CHECK-CVT-NEXT: mov v3.s[2], w8 +; CHECK-CVT-NEXT: csinc w8, w9, wzr, lo +; CHECK-CVT-NEXT: mov v2.s[3], w12 +; CHECK-CVT-NEXT: mov v3.s[3], w8 +; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v2.8h ; CHECK-CVT-NEXT: xtn v0.8b, v0.8h ; CHECK-CVT-NEXT: ret ; @@ -1835,22 +1834,21 @@ ; CHECK-CVT-NEXT: csel w14, w14, w8, lo ; CHECK-CVT-NEXT: cmp w11, #255 ; CHECK-CVT-NEXT: csel w11, w11, w8, lo -; CHECK-CVT-NEXT: fmov s2, w11 -; CHECK-CVT-NEXT: fcvtzu w11, s1 -; CHECK-CVT-NEXT: fmov s1, w10 -; CHECK-CVT-NEXT: mov v2.s[1], w14 -; CHECK-CVT-NEXT: cmp w11, #255 -; CHECK-CVT-NEXT: csel w10, w11, w8, lo -; CHECK-CVT-NEXT: fcvtzu w11, s0 -; CHECK-CVT-NEXT: mov v1.s[1], w9 -; CHECK-CVT-NEXT: mov v2.s[2], w10 -; CHECK-CVT-NEXT: cmp w11, #255 -; CHECK-CVT-NEXT: csel w8, w11, w8, lo -; CHECK-CVT-NEXT: mov v1.s[2], w12 -; CHECK-CVT-NEXT: mov v2.s[3], w8 -; CHECK-CVT-NEXT: mov v1.s[3], w13 -; CHECK-CVT-NEXT: xtn v0.4h, v2.4s -; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s +; CHECK-CVT-NEXT: fmov s2, w10 +; CHECK-CVT-NEXT: fcvtzu w10, s1 +; CHECK-CVT-NEXT: fmov s3, w11 +; CHECK-CVT-NEXT: mov v2.s[1], w9 +; CHECK-CVT-NEXT: cmp w10, #255 +; CHECK-CVT-NEXT: csel w9, w10, w8, lo +; CHECK-CVT-NEXT: fcvtzu w10, s0 +; CHECK-CVT-NEXT: mov v3.s[1], w14 +; CHECK-CVT-NEXT: mov v2.s[2], w12 +; CHECK-CVT-NEXT: cmp w10, #255 +; CHECK-CVT-NEXT: csel w8, w10, w8, lo +; CHECK-CVT-NEXT: mov v3.s[2], w9 +; CHECK-CVT-NEXT: mov v2.s[3], w13 +; CHECK-CVT-NEXT: mov v3.s[3], w8 +; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v2.8h ; CHECK-CVT-NEXT: xtn v0.8b, v0.8h ; CHECK-CVT-NEXT: ret ; @@ -1893,22 +1891,21 @@ ; CHECK-CVT-NEXT: csel w14, w14, w8, lo ; CHECK-CVT-NEXT: cmp w11, w8 ; CHECK-CVT-NEXT: csel w11, w11, w8, lo -; CHECK-CVT-NEXT: fmov s2, w11 -; CHECK-CVT-NEXT: fcvtzu w11, s1 -; CHECK-CVT-NEXT: fmov s1, w10 -; CHECK-CVT-NEXT: mov v2.s[1], w14 -; CHECK-CVT-NEXT: cmp w11, w8 -; CHECK-CVT-NEXT: csel w10, w11, w8, lo -; CHECK-CVT-NEXT: fcvtzu w11, s0 -; CHECK-CVT-NEXT: mov v1.s[1], w9 -; CHECK-CVT-NEXT: mov v2.s[2], w10 -; CHECK-CVT-NEXT: cmp w11, w8 -; CHECK-CVT-NEXT: csel w8, w11, w8, lo -; CHECK-CVT-NEXT: mov v1.s[2], w12 -; CHECK-CVT-NEXT: mov v2.s[3], w8 -; CHECK-CVT-NEXT: mov v1.s[3], w13 -; CHECK-CVT-NEXT: xtn v0.4h, v2.4s -; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s +; CHECK-CVT-NEXT: fmov s2, w10 +; CHECK-CVT-NEXT: fcvtzu w10, s1 +; CHECK-CVT-NEXT: fmov s3, w11 +; CHECK-CVT-NEXT: mov v2.s[1], w9 +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: csel w9, w10, w8, lo +; CHECK-CVT-NEXT: fcvtzu w10, s0 +; CHECK-CVT-NEXT: mov v3.s[1], w14 +; CHECK-CVT-NEXT: mov v2.s[2], w12 +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: csel w8, w10, w8, lo +; CHECK-CVT-NEXT: mov v3.s[2], w9 +; CHECK-CVT-NEXT: mov v2.s[3], w13 +; CHECK-CVT-NEXT: mov v3.s[3], w8 +; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v2.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i13: @@ -1951,22 +1948,21 @@ ; CHECK-CVT-NEXT: csel w14, w14, w8, lo ; CHECK-CVT-NEXT: cmp w11, w8 ; CHECK-CVT-NEXT: csel w11, w11, w8, lo -; CHECK-CVT-NEXT: fmov s2, w11 -; CHECK-CVT-NEXT: fcvtzu w11, s1 -; CHECK-CVT-NEXT: fmov s1, w10 -; CHECK-CVT-NEXT: mov v2.s[1], w14 -; CHECK-CVT-NEXT: cmp w11, w8 -; CHECK-CVT-NEXT: csel w10, w11, w8, lo -; CHECK-CVT-NEXT: fcvtzu w11, s0 -; CHECK-CVT-NEXT: mov v1.s[1], w9 -; CHECK-CVT-NEXT: mov v2.s[2], w10 -; CHECK-CVT-NEXT: cmp w11, w8 -; CHECK-CVT-NEXT: csel w8, w11, w8, lo -; CHECK-CVT-NEXT: mov v1.s[2], w12 -; CHECK-CVT-NEXT: mov v2.s[3], w8 -; CHECK-CVT-NEXT: mov v1.s[3], w13 -; CHECK-CVT-NEXT: xtn v0.4h, v2.4s -; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s +; CHECK-CVT-NEXT: fmov s2, w10 +; CHECK-CVT-NEXT: fcvtzu w10, s1 +; CHECK-CVT-NEXT: fmov s3, w11 +; CHECK-CVT-NEXT: mov v2.s[1], w9 +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: csel w9, w10, w8, lo +; CHECK-CVT-NEXT: fcvtzu w10, s0 +; CHECK-CVT-NEXT: mov v3.s[1], w14 +; CHECK-CVT-NEXT: mov v2.s[2], w12 +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: csel w8, w10, w8, lo +; CHECK-CVT-NEXT: mov v3.s[2], w9 +; CHECK-CVT-NEXT: mov v2.s[3], w13 +; CHECK-CVT-NEXT: mov v3.s[3], w8 +; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v2.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i16: Index: llvm/test/CodeGen/AArch64/neon-truncstore.ll =================================================================== --- llvm/test/CodeGen/AArch64/neon-truncstore.ll +++ llvm/test/CodeGen/AArch64/neon-truncstore.ll @@ -18,8 +18,7 @@ define void @v4i64_v4i32(<4 x i64> %a, <4 x i32>* %result) { ; CHECK-LABEL: v4i64_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: xtn v0.2s, v0.2d -; CHECK-NEXT: xtn2 v0.4s, v1.2d +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret %b = trunc <4 x i64> %a to <4 x i32> @@ -30,10 +29,8 @@ define void @v8i64_v8i32(<8 x i64> %a, <8 x i32>* %result) { ; CHECK-LABEL: v8i64_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: xtn v2.2s, v2.2d -; CHECK-NEXT: xtn v0.2s, v0.2d -; CHECK-NEXT: xtn2 v2.4s, v3.2d -; CHECK-NEXT: xtn2 v0.4s, v1.2d +; CHECK-NEXT: uzp1 v2.4s, v2.4s, v3.4s +; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s ; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret %b = trunc <8 x i64> %a to <8 x i32> @@ -69,8 +66,7 @@ define void @v8i32_v8i16(<8 x i32> %a, <8 x i16>* %result) { ; CHECK-LABEL: v8i32_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: xtn v0.4h, v0.4s -; CHECK-NEXT: xtn2 v0.8h, v1.4s +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret %b = trunc <8 x i32> %a to <8 x i16> @@ -81,10 +77,8 @@ define void @v16i32_v16i16(<16 x i32> %a, <16 x i16>* %result) { ; CHECK-LABEL: v16i32_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: xtn v2.4h, v2.4s -; CHECK-NEXT: xtn v0.4h, v0.4s -; CHECK-NEXT: xtn2 v2.8h, v3.4s -; CHECK-NEXT: xtn2 v0.8h, v1.4s +; CHECK-NEXT: uzp1 v2.8h, v2.8h, v3.8h +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h ; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret %b = trunc <16 x i32> %a to <16 x i16> @@ -121,8 +115,7 @@ define void @v8i32_v8i8(<8 x i32> %a, <8 x i8>* %result) { ; CHECK-LABEL: v8i32_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: xtn v0.4h, v0.4s -; CHECK-NEXT: xtn2 v0.8h, v1.4s +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h ; CHECK-NEXT: xtn v0.8b, v0.8h ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret @@ -134,12 +127,9 @@ define void @v16i32_v16i8(<16 x i32> %a, <16 x i8>* %result) { ; CHECK-LABEL: v16i32_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: xtn v0.4h, v0.4s -; CHECK-NEXT: xtn v2.4h, v2.4s -; CHECK-NEXT: xtn2 v0.8h, v1.4s -; CHECK-NEXT: xtn2 v2.8h, v3.4s -; CHECK-NEXT: xtn v0.8b, v0.8h -; CHECK-NEXT: xtn2 v0.16b, v2.8h +; CHECK-NEXT: uzp1 v2.8h, v2.8h, v3.8h +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret %b = trunc <16 x i32> %a to <16 x i8> @@ -150,18 +140,12 @@ define void @v32i32_v32i8(<32 x i32> %a, <32 x i8>* %result) { ; CHECK-LABEL: v32i32_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: xtn v4.4h, v4.4s -; CHECK-NEXT: xtn v0.4h, v0.4s -; CHECK-NEXT: xtn v6.4h, v6.4s -; CHECK-NEXT: xtn v2.4h, v2.4s -; CHECK-NEXT: xtn2 v4.8h, v5.4s -; CHECK-NEXT: xtn2 v0.8h, v1.4s -; CHECK-NEXT: xtn2 v6.8h, v7.4s -; CHECK-NEXT: xtn2 v2.8h, v3.4s -; CHECK-NEXT: xtn v1.8b, v4.8h -; CHECK-NEXT: xtn v0.8b, v0.8h -; CHECK-NEXT: xtn2 v1.16b, v6.8h -; CHECK-NEXT: xtn2 v0.16b, v2.8h +; CHECK-NEXT: uzp1 v6.8h, v6.8h, v7.8h +; CHECK-NEXT: uzp1 v2.8h, v2.8h, v3.8h +; CHECK-NEXT: uzp1 v3.8h, v4.8h, v5.8h +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: uzp1 v1.16b, v3.16b, v6.16b +; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %b = trunc <32 x i32> %a to <32 x i8> @@ -209,8 +193,7 @@ define void @v16i16_v16i8(<16 x i16> %a, <16 x i8>* %result) { ; CHECK-LABEL: v16i16_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: xtn v0.8b, v0.8h -; CHECK-NEXT: xtn2 v0.16b, v1.8h +; CHECK-NEXT: uzp1 v0.16b, v0.16b, v1.16b ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret %b = trunc <16 x i16> %a to <16 x i8> @@ -221,10 +204,8 @@ define void @v32i16_v32i8(<32 x i16> %a, <32 x i8>* %result) { ; CHECK-LABEL: v32i16_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: xtn v2.8b, v2.8h -; CHECK-NEXT: xtn v0.8b, v0.8h -; CHECK-NEXT: xtn2 v2.16b, v3.8h -; CHECK-NEXT: xtn2 v0.16b, v1.8h +; CHECK-NEXT: uzp1 v2.16b, v2.16b, v3.16b +; CHECK-NEXT: uzp1 v0.16b, v0.16b, v1.16b ; CHECK-NEXT: stp q0, q2, [x0] ; CHECK-NEXT: ret %b = trunc <32 x i16> %a to <32 x i8>