Index: lib/Target/AArch64/AArch64InstrNEON.td =================================================================== --- lib/Target/AArch64/AArch64InstrNEON.td +++ lib/Target/AArch64/AArch64InstrNEON.td @@ -3222,6 +3222,36 @@ def : Pat<(v4i16 (load GPR64xsp:$addr)), (LD1_4H GPR64xsp:$addr)>; def : Pat<(v8i8 (load GPR64xsp:$addr)), (LD1_8B GPR64xsp:$addr)>; +def : Pat<(v2f64 (load constpool:$label)), + (LD1_2D (ADRxi $label))>; +def : Pat<(v2i64 (load constpool:$label)), + (LD1_2D (ADRxi $label))>; + +def : Pat<(v4f32 (load constpool:$label)), + (LD1_4S (ADRxi $label))>; +def : Pat<(v4i32 (load constpool:$label)), + (LD1_4S (ADRxi $label))>; + +def : Pat<(v8i16 (load constpool:$label)), + (LD1_8H (ADRxi $label))>; +def : Pat<(v16i8 (load constpool:$label)), + (LD1_16B (ADRxi $label))>; + +def : Pat<(v1f64 (load constpool:$label)), + (LD1_1D (ADRxi $label))>; +def : Pat<(v1i64 (load constpool:$label)), + (LD1_1D (ADRxi $label))>; + +def : Pat<(v2i32 (load constpool:$label)), + (LD1_2S (ADRxi $label))>; +def : Pat<(v2f32 (load constpool:$label)), + (LD1_2S (ADRxi $label))>; + +def : Pat<(v4i16 (load constpool:$label)), + (LD1_4H (ADRxi $label))>; +def : Pat<(v8i8 (load constpool:$label)), + (LD1_8B (ADRxi $label))>; + def : Pat<(store (v2i64 VPR128:$value), GPR64xsp:$addr), (ST1_2D GPR64xsp:$addr, VPR128:$value)>; def : Pat<(store (v2f64 VPR128:$value), GPR64xsp:$addr), Index: test/CodeGen/AArch64/neon-simd-ldst-one.ll =================================================================== --- test/CodeGen/AArch64/neon-simd-ldst-one.ll +++ test/CodeGen/AArch64/neon-simd-ldst-one.ll @@ -2033,6 +2033,87 @@ ret void } +define <16 x i8> @test_ld_from_poll_v16i8(<16 x i8> %a) { +; CHECK-LABEL: test_ld_from_poll_v16i8 +; CHECK: adr {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ld1 {{{v[0-9]+}}.16b}, [{{x[0-9]+}}] +entry: + %b = add <16 x i8> %a, + ret <16 x i8> %b +} + +define <8 x i16> @test_ld_from_poll_v8i16(<8 x i16> %a) { +; CHECK-LABEL: test_ld_from_poll_v8i16 +; CHECK: adr {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ld1 {{{v[0-9]+}}.8h}, [{{x[0-9]+}}] +entry: + %b = add <8 x i16> %a, + ret <8 x i16> %b +} + +define <4 x i32> @test_ld_from_poll_v4i32(<4 x i32> %a) { +; CHECK-LABEL: test_ld_from_poll_v4i32 +; CHECK: adr {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ld1 {{{v[0-9]+}}.4s}, [{{x[0-9]+}}] +entry: + %b = add <4 x i32> %a, + ret <4 x i32> %b +} + +define <2 x i64> @test_ld_from_poll_v2i64(<2 x i64> %a) { +; CHECK-LABEL: test_ld_from_poll_v2i64 +; CHECK: adr {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ld1 {{{v[0-9]+}}.2d}, [{{x[0-9]+}}] +entry: + %b = add <2 x i64> %a, + ret <2 x i64> %b +} + +define <8 x i8> @test_ld_from_poll_v8i8(<8 x i8> %a) { +; CHECK-LABEL: test_ld_from_poll_v8i8 +; CHECK: adr {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ld1 {{{v[0-9]+}}.8b}, [{{x[0-9]+}}] +entry: + %b = add <8 x i8> %a, + ret <8 x i8> %b +} + +define <4 x i16> @test_ld_from_poll_v4i16(<4 x i16> %a) { +; CHECK-LABEL: test_ld_from_poll_v4i16 +; CHECK: adr {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ld1 {{{v[0-9]+}}.4h}, [{{x[0-9]+}}] +entry: + %b = add <4 x i16> %a, + ret <4 x i16> %b +} + +define <2 x i32> @test_ld_from_poll_v2i32(<2 x i32> %a) { +; CHECK-LABEL: test_ld_from_poll_v2i32 +; CHECK: adr {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ld1 {{{v[0-9]+}}.2s}, [{{x[0-9]+}}] +entry: + %b = add <2 x i32> %a, + ret <2 x i32> %b +} + +define <4 x float> @test_ld_from_poll_v4f32(<4 x float> %a) { +; CHECK-LABEL: test_ld_from_poll_v4f32 +; CHECK: adr {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ld1 {{{v[0-9]+}}.4s}, [{{x[0-9]+}}] +entry: + %b = fadd <4 x float> %a, + ret <4 x float> %b +} + +define <2 x double> @test_ld_from_poll_v2f64(<2 x double> %a) { +; CHECK-LABEL: test_ld_from_poll_v2f64 +; CHECK: adr {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ld1 {{{v[0-9]+}}.2d}, [{{x[0-9]+}}] +entry: + %b = fadd <2 x double> %a, + ret <2 x double> %b +} + declare { <16 x i8>, <16 x i8> } @llvm.arm.neon.vld2lane.v16i8(i8*, <16 x i8>, <16 x i8>, i32, i32) declare { <8 x i16>, <8 x i16> } @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) declare { <4 x i32>, <4 x i32> } @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32)