diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1234,6 +1234,41 @@ def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))), (PUNPKHI_PP PPR:$Ps)>; + def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 0))), + (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))>; + def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 2))), + (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps))>; + def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 4))), + (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))>; + def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 6))), + (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))>; + + def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 0))), + (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))>; + def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 4))), + (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps))>; + def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))), + (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))>; + def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 12))), + (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))>; + + def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 0))), + (PUNPKLO_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps)))>; + def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 2))), + (PUNPKHI_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps)))>; + def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 4))), + (PUNPKLO_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps)))>; + def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 6))), + (PUNPKHI_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps)))>; + def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))), + (PUNPKLO_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps)))>; + def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 10))), + (PUNPKHI_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps)))>; + def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 12))), + (PUNPKLO_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps)))>; + def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 14))), + (PUNPKHI_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps)))>; + // Extract subvectors from FP SVE vectors def : Pat<(nxv2f16 (extract_subvector (nxv4f16 ZPR:$Zs), (i64 0))), (UUNPKLO_ZZ_D ZPR:$Zs)>; diff --git a/llvm/test/CodeGen/AArch64/sve-extract-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-extract-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-vector.ll @@ -378,6 +378,179 @@ ret %res } +; +; Extracting a predicate from a wider predicate, that is more than twice the size. +; + +define @extract_nxv2i1_nxv16i1_0( %vec) { +; CHECK-LABEL: extract_nxv2i1_nxv16i1_0: +; CHECK: // %bb.0: +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv2i1.nxv16i1( %vec, i64 0) + ret %res +} + +define @extract_nxv2i1_nxv16i1_2( %vec) { +; CHECK-LABEL: extract_nxv2i1_nxv16i1_2: +; CHECK: // %bb.0: +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv2i1.nxv16i1( %vec, i64 2) + ret %res +} + +define @extract_nxv2i1_nxv16i1_4( %vec) { +; CHECK-LABEL: extract_nxv2i1_nxv16i1_4: +; CHECK: // %bb.0: +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv2i1.nxv16i1( %vec, i64 4) + ret %res +} + +define @extract_nxv2i1_nxv16i1_6( %vec) { +; CHECK-LABEL: extract_nxv2i1_nxv16i1_6: +; CHECK: // %bb.0: +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv2i1.nxv16i1( %vec, i64 6) + ret %res +} + +define @extract_nxv2i1_nxv16i1_8( %vec) { +; CHECK-LABEL: extract_nxv2i1_nxv16i1_8: +; CHECK: // %bb.0: +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv2i1.nxv16i1( %vec, i64 8) + ret %res +} + +define @extract_nxv2i1_nxv16i1_10( %vec) { +; CHECK-LABEL: extract_nxv2i1_nxv16i1_10: +; CHECK: // %bb.0: +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv2i1.nxv16i1( %vec, i64 10) + ret %res +} + +define @extract_nxv2i1_nxv16i1_12( %vec) { +; CHECK-LABEL: extract_nxv2i1_nxv16i1_12: +; CHECK: // %bb.0: +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv2i1.nxv16i1( %vec, i64 12) + ret %res +} + +define @extract_nxv2i1_nxv16i1_14( %vec) { +; CHECK-LABEL: extract_nxv2i1_nxv16i1_14: +; CHECK: // %bb.0: +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv2i1.nxv16i1( %vec, i64 14) + ret %res +} + +define @extract_nxv2i1_nxv8i1_0( %vec) { +; CHECK-LABEL: extract_nxv2i1_nxv8i1_0: +; CHECK: // %bb.0: +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv2i1.nxv8i1( %vec, i64 0) + ret %res +} + +define @extract_nxv2i1_nxv8i1_2( %vec) { +; CHECK-LABEL: extract_nxv2i1_nxv8i1_2: +; CHECK: // %bb.0: +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv2i1.nxv8i1( %vec, i64 2) + ret %res +} + +define @extract_nxv2i1_nxv8i1_4( %vec) { +; CHECK-LABEL: extract_nxv2i1_nxv8i1_4: +; CHECK: // %bb.0: +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv2i1.nxv8i1( %vec, i64 4) + ret %res +} + +define @extract_nxv2i1_nxv8i1_6( %vec) { +; CHECK-LABEL: extract_nxv2i1_nxv8i1_6: +; CHECK: // %bb.0: +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv2i1.nxv8i1( %vec, i64 6) + ret %res +} + +define @extract_nxv4i1_nxv16i1_0( %vec) { +; CHECK-LABEL: extract_nxv4i1_nxv16i1_0: +; CHECK: // %bb.0: +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv4i1.nxv16i1( %vec, i64 0) + ret %res +} + +define @extract_nxv4i1_nxv16i1_4( %vec) { +; CHECK-LABEL: extract_nxv4i1_nxv16i1_4: +; CHECK: // %bb.0: +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv4i1.nxv16i1( %vec, i64 4) + ret %res +} + +define @extract_nxv4i1_nxv16i1_8( %vec) { +; CHECK-LABEL: extract_nxv4i1_nxv16i1_8: +; CHECK: // %bb.0: +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv4i1.nxv16i1( %vec, i64 8) + ret %res +} + +define @extract_nxv4i1_nxv16i1_12( %vec) { +; CHECK-LABEL: extract_nxv4i1_nxv16i1_12: +; CHECK: // %bb.0: +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv4i1.nxv16i1( %vec, i64 12) + ret %res +} + + attributes #0 = { vscale_range(2,2) } declare <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64(, i64) @@ -393,3 +566,6 @@ declare @llvm.experimental.vector.extract.nxv2i8.nxv32i8( , i64) +declare @llvm.experimental.vector.extract.nxv2i1.nxv16i1(, i64) +declare @llvm.experimental.vector.extract.nxv2i1.nxv8i1(, i64) +declare @llvm.experimental.vector.extract.nxv4i1.nxv16i1(, i64)