diff --git a/llvm/test/CodeGen/AArch64/nontemporal-load.ll b/llvm/test/CodeGen/AArch64/nontemporal-load.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/nontemporal-load.ll @@ -0,0 +1,243 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O3 < %s -mtriple aarch64-apple-darwin | FileCheck %s + +define <4 x double> @test_ldnp_v4f64(<4 x double>* %A) { +; CHECK-LABEL: test_ldnp_v4f64: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ret + %lv = load <4 x double>, <4 x double>* %A, align 8, !nontemporal !0 + ret <4 x double> %lv +} + +define <4 x i64> @test_ldnp_v4i64(<4 x i64>* %A) { +; CHECK-LABEL: test_ldnp_v4i64: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ret + %lv = load <4 x i64>, <4 x i64>* %A, align 8, !nontemporal !0 + ret <4 x i64> %lv +} +define <8 x i32> @test_ldnp_v8i32(<8 x i32>* %A) { +; CHECK-LABEL: test_ldnp_v8i32: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ret + %lv = load <8 x i32>, <8 x i32>* %A, align 8, !nontemporal !0 + ret <8 x i32> %lv +} + +define <8 x float> @test_ldnp_v8f32(<8 x float>* %A) { +; CHECK-LABEL: test_ldnp_v8f32: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ret + %lv = load <8 x float>, <8 x float>* %A, align 8, !nontemporal !0 + ret <8 x float> %lv +} + +define <16 x i16> @test_ldnp_v16i16(<16 x i16>* %A) { +; CHECK-LABEL: test_ldnp_v16i16: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ret + %lv = load <16 x i16>, <16 x i16>* %A, align 8, !nontemporal !0 + ret <16 x i16> %lv +} + +define <16 x half> @test_ldnp_v16f16(<16 x half>* %A) { +; CHECK-LABEL: test_ldnp_v16f16: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ret + %lv = load <16 x half>, <16 x half>* %A, align 8, !nontemporal !0 + ret <16 x half> %lv +} + +define <32 x i8> @test_ldnp_v32i8(<32 x i8>* %A) { +; CHECK-LABEL: test_ldnp_v32i8: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ret + %lv = load <32 x i8>, <32 x i8>* %A, align 8, !nontemporal !0 + ret <32 x i8> %lv +} + +define <4 x i32> @test_ldnp_v4i32(<4 x i32>* %A) { +; CHECK-LABEL: test_ldnp_v4i32: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ret + %lv = load<4 x i32>, <4 x i32>* %A, align 8, !nontemporal !0 + ret <4 x i32> %lv +} + +define <4 x float> @test_ldnp_v4f32(<4 x float>* %A) { +; CHECK-LABEL: test_ldnp_v4f32: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ret + %lv = load<4 x float>, <4 x float>* %A, align 8, !nontemporal !0 + ret <4 x float> %lv +} + +define <8 x i16> @test_ldnp_v8i16(<8 x i16>* %A) { +; CHECK-LABEL: test_ldnp_v8i16: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ret + %lv = load <8 x i16>, <8 x i16>* %A, align 8, !nontemporal !0 + ret <8 x i16> %lv +} + +define <16 x i8> @test_ldnp_v16i8(<16 x i8>* %A) { +; CHECK-LABEL: test_ldnp_v16i8: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ret + %lv = load <16 x i8>, <16 x i8>* %A, align 8, !nontemporal !0 + ret <16 x i8> %lv +} +define <2 x double> @test_ldnp_v2f64(<2 x double>* %A) { +; CHECK-LABEL: test_ldnp_v2f64: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ret + %lv = load <2 x double>, <2 x double>* %A, align 8, !nontemporal !0 + ret <2 x double> %lv +} + +define <2 x i32> @test_ldnp_v2i32(<2 x i32>* %A) { +; CHECK-LABEL: test_ldnp_v2i32: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ret + %lv = load <2 x i32>, <2 x i32>* %A, align 8, !nontemporal !0 + ret <2 x i32> %lv +} + +define <2 x float> @test_ldnp_v2f32(<2 x float>* %A) { +; CHECK-LABEL: test_ldnp_v2f32: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ret + %lv = load <2 x float>, <2 x float>* %A, align 8, !nontemporal !0 + ret <2 x float> %lv +} + +define <4 x i16> @test_ldnp_v4i16(<4 x i16>* %A) { +; CHECK-LABEL: test_ldnp_v4i16: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ret + %lv = load <4 x i16>, <4 x i16>* %A, align 8, !nontemporal !0 + ret <4 x i16> %lv +} + +define <8 x i8> @test_ldnp_v8i8(<8 x i8>* %A) { +; CHECK-LABEL: test_ldnp_v8i8: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ret + %lv = load <8 x i8>, <8 x i8>* %A, align 8, !nontemporal !0 + ret <8 x i8> %lv +} + +define <1 x double> @test_ldnp_v1f64(<1 x double>* %A) { +; CHECK-LABEL: test_ldnp_v1f64: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ret + %lv = load <1 x double>, <1 x double>* %A, align 8, !nontemporal !0 + ret <1 x double> %lv +} + +define <1 x i64> @test_ldnp_v1i64(<1 x i64>* %A) { +; CHECK-LABEL: test_ldnp_v1i64: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ret + %lv = load <1 x i64>, <1 x i64>* %A, align 8, !nontemporal !0 + ret <1 x i64> %lv +} + +define <32 x i16> @test_ldnp_v32i16(<32 x i16>* %A) { +; CHECK-LABEL: test_ldnp_v32i16: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q2, q3, [x0, #32] +; CHECK-NEXT: ret + %lv = load <32 x i16>, <32 x i16>* %A, align 8, !nontemporal !0 + ret <32 x i16> %lv +} + +define <32 x half> @test_ldnp_v32f16(<32 x half>* %A) { +; CHECK-LABEL: test_ldnp_v32f16: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q2, q3, [x0, #32] +; CHECK-NEXT: ret + %lv = load <32 x half>, <32 x half>* %A, align 8, !nontemporal !0 + ret <32 x half> %lv +} + +define <16 x i32> @test_ldnp_v16i32(<16 x i32>* %A) { +; CHECK-LABEL: test_ldnp_v16i32: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q2, q3, [x0, #32] +; CHECK-NEXT: ret + %lv = load <16 x i32>, <16 x i32>* %A, align 8, !nontemporal !0 + ret <16 x i32> %lv +} + +define <16 x float> @test_ldnp_v16f32(<16 x float>* %A) { +; CHECK-LABEL: test_ldnp_v16f32: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q2, q3, [x0, #32] +; CHECK-NEXT: ret + %lv = load <16 x float>, <16 x float>* %A, align 8, !nontemporal !0 + ret <16 x float> %lv +} + +define <17 x float> @test_ldnp_v17f32(<17 x float>* %A) { +; CHECK-LABEL: test_ldnp_v17f32: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldp q1, q2, [x0, #32] +; CHECK-NEXT: ldp q3, q4, [x0] +; CHECK-NEXT: ldr s0, [x0, #64] +; CHECK-NEXT: stp q3, q4, [x8] +; CHECK-NEXT: stp q1, q2, [x8, #32] +; CHECK-NEXT: str s0, [x8, #64] +; CHECK-NEXT: ret + %lv = load <17 x float>, <17 x float>* %A, align 8, !nontemporal !0 + ret <17 x float> %lv +} + +define <16 x i64> @test_ldnp_v16i64(<16 x i64>* %A) { +; CHECK-LABEL: test_ldnp_v16i64: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q2, q3, [x0, #32] +; CHECK-NEXT: ldp q4, q5, [x0, #64] +; CHECK-NEXT: ldp q6, q7, [x0, #96] +; CHECK-NEXT: ret + %lv = load <16 x i64>, <16 x i64>* %A, align 8, !nontemporal !0 + ret <16 x i64> %lv +} + +define <16 x double> @test_ldnp_v16f64(<16 x double>* %A) { +; CHECK-LABEL: test_ldnp_v16f64: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q2, q3, [x0, #32] +; CHECK-NEXT: ldp q4, q5, [x0, #64] +; CHECK-NEXT: ldp q6, q7, [x0, #96] +; CHECK-NEXT: ret + %lv = load <16 x double>, <16 x double>* %A, align 8, !nontemporal !0 + ret <16 x double> %lv +} + + +!0 = !{i32 1}