diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -5177,6 +5177,16 @@ (i64 0)), dsub)>; +def : Pat<(vector_insert (v8f16 v8f16:$Rn), (f16 fpimm0), + (i64 VectorIndexH:$imm)), + (INSvi16gpr V128:$Rn, VectorIndexH:$imm, WZR)>; +def : Pat<(vector_insert v4f32:$Rn, (f32 fpimm0), + (i64 VectorIndexS:$imm)), + (INSvi32gpr V128:$Rn, VectorIndexS:$imm, WZR)>; +def : Pat<(vector_insert v2f64:$Rn, (f64 fpimm0), + (i64 VectorIndexD:$imm)), + (INSvi64gpr V128:$Rn, VectorIndexS:$imm, XZR)>; + def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn), (f16 FPR16:$Rm), (i64 VectorIndexH:$imm))), (INSvi16lane diff --git a/llvm/test/CodeGen/AArch64/arm64-vector-insertion.ll b/llvm/test/CodeGen/AArch64/arm64-vector-insertion.ll --- a/llvm/test/CodeGen/AArch64/arm64-vector-insertion.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vector-insertion.ll @@ -93,6 +93,7 @@ %vecinit3 = insertelement <4 x float> %0, float %a, i32 2 ret <4 x float> %vecinit3 } + define <8 x i16> @test_insert_v8i16_i16_zero(<8 x i16> %a) { ; CHECK-LABEL: test_insert_v8i16_i16_zero: ; CHECK: bb.0: @@ -120,11 +121,27 @@ ret <4 x half> %vecinit5 } +; We cannot use wzr for negative zero. +define <4 x half> @test_insert_v4f16_f16_negzero(<4 x half> %a) { +; CHECK-LABEL: test_insert_v4f16_f16_negzero: +; CHECK: bb.0: +; CHECK-NEXT: adrp x8, .LCPI8_0 +; CHECK-NEXT: // kill +; CHECK-NEXT: add x8, x8, :lo12:.LCPI8_0 +; CHECK-NEXT: ld1.h { v0 }[0], [x8] +; CHECK-NEXT: // kill +; CHECK-NEXT: ret + +entry: + %vecinit5 = insertelement <4 x half> %a, half -0.000000e+00, i32 0 + ret <4 x half> %vecinit5 +} + define <8 x half> @test_insert_v8f16_f16_zero(<8 x half> %a) { ; CHECK-LABEL: test_insert_v8f16_f16_zero: ; CHECK: bb.0: -; CHECK-NEXT: adrp x8, .LCPI8_0 -; CHECK-NEXT: add x8, x8, :lo12:.LCPI8_0 +; CHECK-NEXT: adrp x8, .LCPI9_0 +; CHECK-NEXT: add x8, x8, :lo12:.LCPI9_0 ; CHECK-NEXT: ld1.h { v0 }[6], [x8] ; CHECK-NEXT: ret @@ -137,8 +154,7 @@ ; CHECK-LABEL: test_insert_v2f32_f32_zero: ; CHECK: bb.0: ; CHECK-NEXT: // kill -; CHECK-NEXT: fmov s1, wzr -; CHECK-NEXT: mov.s v0[0], v1[0] +; CHECK-NEXT: mov.s v0[0], wzr ; CHECK-NEXT: // kill ; CHECK-NEXT: ret @@ -150,8 +166,7 @@ define <4 x float> @test_insert_v4f32_f32_zero(<4 x float> %a) { ; CHECK-LABEL: test_insert_v4f32_f32_zero: ; CHECK: bb.0: -; CHECK-NEXT: fmov s1, wzr -; CHECK-NEXT: mov.s v0[3], v1[0] +; CHECK-NEXT: mov.s v0[3], wzr ; CHECK-NEXT: ret entry: @@ -162,8 +177,7 @@ define <2 x double> @test_insert_v2f64_f64_zero(<2 x double> %a) { ; CHECK-LABEL: test_insert_v2f64_f64_zero: ; CHECK: bb.0: -; CHECK-NEXT: fmov d1, xzr -; CHECK-NEXT: mov.d v0[1], v1[0] +; CHECK-NEXT: mov.d v0[1], xzr ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll @@ -47,8 +47,7 @@ define float @test_v3f32(<3 x float> %a) nounwind { ; CHECK-LABEL: test_v3f32: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov s1, wzr -; CHECK-NEXT: mov v0.s[3], v1.s[0] +; CHECK-NEXT: mov v0.s[3], wzr ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: fadd v0.2s, v0.2s, v1.2s ; CHECK-NEXT: faddp s0, v0.2s