diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td --- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td +++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td @@ -542,3 +542,34 @@ defm : LDOPregister_patterns_mod<"LDADD", "atomic_load_sub", "SUB">; defm : LDOPregister_patterns_mod<"LDCLR", "atomic_load_and", "ORN">; } + +// v8.9a/v9.4a FEAT_LRCPC patterns +let Predicates = [HasRCPC3, HasNEON] in { + // LDAP1 loads + def : Pat<(vector_insert (v2i64 VecListOne128:$Rd), + (i64 (acquiring_load GPR64sp:$Rn)), VectorIndexD:$idx), + (LDAP1 VecListOne128:$Rd, VectorIndexD:$idx, GPR64sp:$Rn)>; + def : Pat<(vector_insert (v2f64 VecListOne128:$Rd), + (f64 (bitconvert (i64 (acquiring_load GPR64sp:$Rn)))), VectorIndexD:$idx), + (LDAP1 VecListOne128:$Rd, VectorIndexD:$idx, GPR64sp:$Rn)>; + def : Pat<(v1i64 (scalar_to_vector + (i64 (acquiring_load GPR64sp:$Rn)))), + (EXTRACT_SUBREG (LDAP1 (v2i64 (IMPLICIT_DEF)), (i64 0), GPR64sp:$Rn), dsub)>; + def : Pat<(v1f64 (scalar_to_vector + (f64 (bitconvert (i64 (acquiring_load GPR64sp:$Rn)))))), + (EXTRACT_SUBREG (LDAP1 (v2f64 (IMPLICIT_DEF)), (i64 0), GPR64sp:$Rn), dsub)>; + + // STL1 stores + def : Pat<(releasing_store GPR64sp:$Rn, + (i64 (vector_extract (v2i64 VecListOne128:$Vt), VectorIndexD:$idx))), + (STL1 VecListOne128:$Vt, VectorIndexD:$idx, GPR64sp:$Rn)>; + def : Pat<(releasing_store GPR64sp:$Rn, + (i64 (bitconvert (f64 (vector_extract (v2f64 VecListOne128:$Vt), VectorIndexD:$idx))))), + (STL1 VecListOne128:$Vt, VectorIndexD:$idx, GPR64sp:$Rn)>; + // The v1i64 version of the vldap1_lane_* intrinsic is represented as a + // vector_insert -> vector_extract -> atomic store sequence, which is captured + // by the patterns above. We only need to cover the v1f64 case manually. + def : Pat<(releasing_store GPR64sp:$Rn, + (i64 (bitconvert (v1f64 VecListOne64:$Vt)))), + (STL1 (SUBREG_TO_REG (i64 0), VecListOne64:$Vt, dsub), (i64 0), GPR64sp:$Rn)>; +} diff --git a/llvm/test/CodeGen/AArch64/rcpc3.ll b/llvm/test/CodeGen/AArch64/rcpc3.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/rcpc3.ll @@ -0,0 +1,272 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=aarch64-none-eabi -mattr=+v8.9a -mattr=+rcpc3 < %s | FileCheck --check-prefix=CHECK-RCPC3 %s +; RUN: llc -mtriple=aarch64-none-eabi -mattr=+v8.9a < %s | FileCheck --check-prefix=CHECK-NO-RCPC3 %s + +define hidden <2 x i64> @test_vldap1q_lane_u64(ptr nocapture noundef readonly %a, <2 x i64> noundef %b) local_unnamed_addr { +; CHECK-RCPC3-LABEL: test_vldap1q_lane_u64: +; CHECK-RCPC3: // %bb.0: +; CHECK-RCPC3-NEXT: ldap1 { v0.d }[1], [x0] +; CHECK-RCPC3-NEXT: ret +; +; CHECK-NO-RCPC3-LABEL: test_vldap1q_lane_u64: +; CHECK-NO-RCPC3: // %bb.0: +; CHECK-NO-RCPC3-NEXT: ldapr x8, [x0] +; CHECK-NO-RCPC3-NEXT: mov v0.d[1], x8 +; CHECK-NO-RCPC3-NEXT: ret + %1 = load atomic i64, ptr %a acquire, align 8 + %vldap1_lane = insertelement <2 x i64> %b, i64 %1, i64 1 + ret <2 x i64> %vldap1_lane +} + +define hidden <2 x i64> @test_vldap1q_lane_s64(ptr nocapture noundef readonly %a, <2 x i64> noundef %b) local_unnamed_addr { +; CHECK-RCPC3-LABEL: test_vldap1q_lane_s64: +; CHECK-RCPC3: // %bb.0: +; CHECK-RCPC3-NEXT: ldap1 { v0.d }[1], [x0] +; CHECK-RCPC3-NEXT: ret +; +; CHECK-NO-RCPC3-LABEL: test_vldap1q_lane_s64: +; CHECK-NO-RCPC3: // %bb.0: +; CHECK-NO-RCPC3-NEXT: ldapr x8, [x0] +; CHECK-NO-RCPC3-NEXT: mov v0.d[1], x8 +; CHECK-NO-RCPC3-NEXT: ret + %1 = load atomic i64, ptr %a acquire, align 8 + %vldap1_lane = insertelement <2 x i64> %b, i64 %1, i64 1 + ret <2 x i64> %vldap1_lane +} + +define hidden nofpclass(nan inf) <2 x double> @test_vldap1q_lane_f64(ptr nocapture noundef readonly %a, <2 x double> noundef nofpclass(nan inf) %b) local_unnamed_addr { +; CHECK-RCPC3-LABEL: test_vldap1q_lane_f64: +; CHECK-RCPC3: // %bb.0: +; CHECK-RCPC3-NEXT: ldap1 { v0.d }[1], [x0] +; CHECK-RCPC3-NEXT: ret +; +; CHECK-NO-RCPC3-LABEL: test_vldap1q_lane_f64: +; CHECK-NO-RCPC3: // %bb.0: +; CHECK-NO-RCPC3-NEXT: ldapr x8, [x0] +; CHECK-NO-RCPC3-NEXT: fmov d1, x8 +; CHECK-NO-RCPC3-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NO-RCPC3-NEXT: ret + %1 = load atomic double, ptr %a acquire, align 8 + %vldap1_lane = insertelement <2 x double> %b, double %1, i64 1 + ret <2 x double> %vldap1_lane +} + +define hidden <2 x i64> @test_vldap1q_lane_p64(ptr nocapture noundef readonly %a, <2 x i64> noundef %b) local_unnamed_addr { +; CHECK-RCPC3-LABEL: test_vldap1q_lane_p64: +; CHECK-RCPC3: // %bb.0: +; CHECK-RCPC3-NEXT: ldap1 { v0.d }[1], [x0] +; CHECK-RCPC3-NEXT: ret +; +; CHECK-NO-RCPC3-LABEL: test_vldap1q_lane_p64: +; CHECK-NO-RCPC3: // %bb.0: +; CHECK-NO-RCPC3-NEXT: ldapr x8, [x0] +; CHECK-NO-RCPC3-NEXT: mov v0.d[1], x8 +; CHECK-NO-RCPC3-NEXT: ret + %1 = load atomic i64, ptr %a acquire, align 8 + %vldap1_lane = insertelement <2 x i64> %b, i64 %1, i64 1 + ret <2 x i64> %vldap1_lane +} + +define hidden <1 x i64> @test_vldap1_lane_u64(ptr nocapture noundef readonly %a, <1 x i64> noundef %b) local_unnamed_addr { +; CHECK-RCPC3-LABEL: test_vldap1_lane_u64: +; CHECK-RCPC3: // %bb.0: +; CHECK-RCPC3-NEXT: ldap1 { v0.d }[0], [x0] +; CHECK-RCPC3-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-RCPC3-NEXT: ret +; +; CHECK-NO-RCPC3-LABEL: test_vldap1_lane_u64: +; CHECK-NO-RCPC3: // %bb.0: +; CHECK-NO-RCPC3-NEXT: ldapr x8, [x0] +; CHECK-NO-RCPC3-NEXT: fmov d0, x8 +; CHECK-NO-RCPC3-NEXT: ret + %1 = load atomic i64, ptr %a acquire, align 8 + %vldap1_lane = insertelement <1 x i64> poison, i64 %1, i64 0 + ret <1 x i64> %vldap1_lane +} + +define hidden <1 x i64> @test_vldap1_lane_s64(ptr nocapture noundef readonly %a, <1 x i64> noundef %b) local_unnamed_addr { +; CHECK-RCPC3-LABEL: test_vldap1_lane_s64: +; CHECK-RCPC3: // %bb.0: +; CHECK-RCPC3-NEXT: ldap1 { v0.d }[0], [x0] +; CHECK-RCPC3-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-RCPC3-NEXT: ret +; +; CHECK-NO-RCPC3-LABEL: test_vldap1_lane_s64: +; CHECK-NO-RCPC3: // %bb.0: +; CHECK-NO-RCPC3-NEXT: ldapr x8, [x0] +; CHECK-NO-RCPC3-NEXT: fmov d0, x8 +; CHECK-NO-RCPC3-NEXT: ret + %1 = load atomic i64, ptr %a acquire, align 8 + %vldap1_lane = insertelement <1 x i64> poison, i64 %1, i64 0 + ret <1 x i64> %vldap1_lane +} + +define hidden nofpclass(nan inf) <1 x double> @test_vldap1_lane_f64(ptr nocapture noundef readonly %a, <1 x double> noundef nofpclass(nan inf) %b) local_unnamed_addr { +; CHECK-RCPC3-LABEL: test_vldap1_lane_f64: +; CHECK-RCPC3: // %bb.0: +; CHECK-RCPC3-NEXT: ldap1 { v0.d }[0], [x0] +; CHECK-RCPC3-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-RCPC3-NEXT: ret +; +; CHECK-NO-RCPC3-LABEL: test_vldap1_lane_f64: +; CHECK-NO-RCPC3: // %bb.0: +; CHECK-NO-RCPC3-NEXT: ldapr x8, [x0] +; CHECK-NO-RCPC3-NEXT: fmov d0, x8 +; CHECK-NO-RCPC3-NEXT: ret + %1 = load atomic double, ptr %a acquire, align 8 + %vldap1_lane = insertelement <1 x double> poison, double %1, i64 0 + ret <1 x double> %vldap1_lane +} + +define hidden <1 x i64> @test_vldap1_lane_p64(ptr nocapture noundef readonly %a, <1 x i64> noundef %b) local_unnamed_addr { +; CHECK-RCPC3-LABEL: test_vldap1_lane_p64: +; CHECK-RCPC3: // %bb.0: +; CHECK-RCPC3-NEXT: ldap1 { v0.d }[0], [x0] +; CHECK-RCPC3-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-RCPC3-NEXT: ret +; +; CHECK-NO-RCPC3-LABEL: test_vldap1_lane_p64: +; CHECK-NO-RCPC3: // %bb.0: +; CHECK-NO-RCPC3-NEXT: ldapr x8, [x0] +; CHECK-NO-RCPC3-NEXT: fmov d0, x8 +; CHECK-NO-RCPC3-NEXT: ret + %1 = load atomic i64, ptr %a acquire, align 8 + %vldap1_lane = insertelement <1 x i64> poison, i64 %1, i64 0 + ret <1 x i64> %vldap1_lane +} + +define hidden void @test_vstl1q_lane_u64(ptr nocapture noundef writeonly %a, <2 x i64> noundef %b) local_unnamed_addr { +; CHECK-RCPC3-LABEL: test_vstl1q_lane_u64: +; CHECK-RCPC3: // %bb.0: +; CHECK-RCPC3-NEXT: stl1 { v0.d }[1], [x0] +; CHECK-RCPC3-NEXT: ret +; +; CHECK-NO-RCPC3-LABEL: test_vstl1q_lane_u64: +; CHECK-NO-RCPC3: // %bb.0: +; CHECK-NO-RCPC3-NEXT: mov x8, v0.d[1] +; CHECK-NO-RCPC3-NEXT: stlr x8, [x0] +; CHECK-NO-RCPC3-NEXT: ret + %1 = extractelement <2 x i64> %b, i64 1 + store atomic i64 %1, ptr %a release, align 8 + ret void +} + +define hidden void @test_vstl1q_lane_s64(ptr nocapture noundef writeonly %a, <2 x i64> noundef %b) local_unnamed_addr { +; CHECK-RCPC3-LABEL: test_vstl1q_lane_s64: +; CHECK-RCPC3: // %bb.0: +; CHECK-RCPC3-NEXT: stl1 { v0.d }[1], [x0] +; CHECK-RCPC3-NEXT: ret +; +; CHECK-NO-RCPC3-LABEL: test_vstl1q_lane_s64: +; CHECK-NO-RCPC3: // %bb.0: +; CHECK-NO-RCPC3-NEXT: mov x8, v0.d[1] +; CHECK-NO-RCPC3-NEXT: stlr x8, [x0] +; CHECK-NO-RCPC3-NEXT: ret + %1 = extractelement <2 x i64> %b, i64 1 + store atomic i64 %1, ptr %a release, align 8 + ret void +} + +define hidden void @test_vstl1q_lane_f64(ptr nocapture noundef writeonly %a, <2 x double> noundef nofpclass(nan inf) %b) local_unnamed_addr { +; CHECK-RCPC3-LABEL: test_vstl1q_lane_f64: +; CHECK-RCPC3: // %bb.0: +; CHECK-RCPC3-NEXT: stl1 { v0.d }[1], [x0] +; CHECK-RCPC3-NEXT: ret +; +; CHECK-NO-RCPC3-LABEL: test_vstl1q_lane_f64: +; CHECK-NO-RCPC3: // %bb.0: +; CHECK-NO-RCPC3-NEXT: mov d0, v0.d[1] +; CHECK-NO-RCPC3-NEXT: fmov x8, d0 +; CHECK-NO-RCPC3-NEXT: stlr x8, [x0] +; CHECK-NO-RCPC3-NEXT: ret + %1 = extractelement <2 x double> %b, i64 1 + store atomic double %1, ptr %a release, align 8 + ret void +} + +define hidden void @test_vstl1q_lane_p64(ptr nocapture noundef writeonly %a, <2 x i64> noundef %b) local_unnamed_addr { +; CHECK-RCPC3-LABEL: test_vstl1q_lane_p64: +; CHECK-RCPC3: // %bb.0: +; CHECK-RCPC3-NEXT: stl1 { v0.d }[1], [x0] +; CHECK-RCPC3-NEXT: ret +; +; CHECK-NO-RCPC3-LABEL: test_vstl1q_lane_p64: +; CHECK-NO-RCPC3: // %bb.0: +; CHECK-NO-RCPC3-NEXT: mov x8, v0.d[1] +; CHECK-NO-RCPC3-NEXT: stlr x8, [x0] +; CHECK-NO-RCPC3-NEXT: ret + %1 = extractelement <2 x i64> %b, i64 1 + store atomic i64 %1, ptr %a release, align 8 + ret void +} + +define hidden void @test_vstl1_lane_u64(ptr nocapture noundef writeonly %a, <1 x i64> noundef %b) local_unnamed_addr { +; CHECK-RCPC3-LABEL: test_vstl1_lane_u64: +; CHECK-RCPC3: // %bb.0: +; CHECK-RCPC3-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-RCPC3-NEXT: stl1 { v0.d }[0], [x0] +; CHECK-RCPC3-NEXT: ret +; +; CHECK-NO-RCPC3-LABEL: test_vstl1_lane_u64: +; CHECK-NO-RCPC3: // %bb.0: +; CHECK-NO-RCPC3-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NO-RCPC3-NEXT: fmov x8, d0 +; CHECK-NO-RCPC3-NEXT: stlr x8, [x0] +; CHECK-NO-RCPC3-NEXT: ret + %1 = extractelement <1 x i64> %b, i64 0 + store atomic i64 %1, ptr %a release, align 8 + ret void +} + +define hidden void @test_vstl1_lane_s64(ptr nocapture noundef writeonly %a, <1 x i64> noundef %b) local_unnamed_addr { +; CHECK-RCPC3-LABEL: test_vstl1_lane_s64: +; CHECK-RCPC3: // %bb.0: +; CHECK-RCPC3-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-RCPC3-NEXT: stl1 { v0.d }[0], [x0] +; CHECK-RCPC3-NEXT: ret +; +; CHECK-NO-RCPC3-LABEL: test_vstl1_lane_s64: +; CHECK-NO-RCPC3: // %bb.0: +; CHECK-NO-RCPC3-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NO-RCPC3-NEXT: fmov x8, d0 +; CHECK-NO-RCPC3-NEXT: stlr x8, [x0] +; CHECK-NO-RCPC3-NEXT: ret + %1 = extractelement <1 x i64> %b, i64 0 + store atomic i64 %1, ptr %a release, align 8 + ret void +} + +define hidden void @test_vstl1_lane_f64(ptr nocapture noundef writeonly %a, <1 x double> noundef nofpclass(nan inf) %b) local_unnamed_addr { +; CHECK-RCPC3-LABEL: test_vstl1_lane_f64: +; CHECK-RCPC3: // %bb.0: +; CHECK-RCPC3-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-RCPC3-NEXT: stl1 { v0.d }[0], [x0] +; CHECK-RCPC3-NEXT: ret +; +; CHECK-NO-RCPC3-LABEL: test_vstl1_lane_f64: +; CHECK-NO-RCPC3: // %bb.0: +; CHECK-NO-RCPC3-NEXT: fmov x8, d0 +; CHECK-NO-RCPC3-NEXT: stlr x8, [x0] +; CHECK-NO-RCPC3-NEXT: ret + %1 = extractelement <1 x double> %b, i64 0 + store atomic double %1, ptr %a release, align 8 + ret void +} + +define hidden void @test_vstl1_lane_p64(ptr nocapture noundef writeonly %a, <1 x i64> noundef %b) local_unnamed_addr { +; CHECK-RCPC3-LABEL: test_vstl1_lane_p64: +; CHECK-RCPC3: // %bb.0: +; CHECK-RCPC3-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-RCPC3-NEXT: stl1 { v0.d }[0], [x0] +; CHECK-RCPC3-NEXT: ret +; +; CHECK-NO-RCPC3-LABEL: test_vstl1_lane_p64: +; CHECK-NO-RCPC3: // %bb.0: +; CHECK-NO-RCPC3-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NO-RCPC3-NEXT: fmov x8, d0 +; CHECK-NO-RCPC3-NEXT: stlr x8, [x0] +; CHECK-NO-RCPC3-NEXT: ret + %1 = extractelement <1 x i64> %b, i64 0 + store atomic i64 %1, ptr %a release, align 8 + ret void +}