Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -14194,9 +14194,9 @@ uint64_t IdxConst = cast(Idx)->getZExtValue(); EVT ResVT = N->getValueType(0); uint64_t NumLanes = ResVT.getVectorElementCount().Min; + SDValue ExtIdx = DAG.getIntPtrConstant(IdxConst * NumLanes, DL); SDValue Val = - DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1, - DAG.getConstant(IdxConst * NumLanes, DL, MVT::i32)); + DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1, ExtIdx); return DAG.getMergeValues({Val, Chain}, DL); } case Intrinsic::aarch64_sve_tuple_set: { @@ -14222,9 +14222,10 @@ if (I == IdxConst) Opnds.push_back(Vec); else { + SDValue ExtIdx = DAG.getIntPtrConstant(I * NumLanes, DL); Opnds.push_back( DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, Vec.getValueType(), Tuple, - DAG.getConstant(I * NumLanes, DL, MVT::i32))); + ExtIdx)); } } SDValue Concat = Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1131,19 +1131,29 @@ defm TRN2_PPP : sve_int_perm_bin_perm_pp<0b101, "trn2", AArch64trn2>; // Extract lo/hi halves of legal predicate types. - def : Pat<(nxv2i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 0))), + def : Pat<(nxv2i1 (extract_subvector (nxv4i1 PPR:$Ps), (iPTR 0))), (ZIP1_PPP_S PPR:$Ps, (PFALSE))>; - def : Pat<(nxv2i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 2))), + def : Pat<(nxv2i1 (extract_subvector (nxv4i1 PPR:$Ps), (iPTR 2))), (ZIP2_PPP_S PPR:$Ps, (PFALSE))>; - def : Pat<(nxv4i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 0))), + def : Pat<(nxv4i1 (extract_subvector (nxv8i1 PPR:$Ps), (iPTR 0))), (ZIP1_PPP_H PPR:$Ps, (PFALSE))>; - def : Pat<(nxv4i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 4))), + def : Pat<(nxv4i1 (extract_subvector (nxv8i1 PPR:$Ps), (iPTR 4))), (ZIP2_PPP_H PPR:$Ps, (PFALSE))>; - def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 0))), + def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (iPTR 0))), (ZIP1_PPP_B PPR:$Ps, (PFALSE))>; - def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))), + def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (iPTR 8))), (ZIP2_PPP_B PPR:$Ps, (PFALSE))>; + // Extract subvectors from FP SVE vectors + def : Pat<(nxv4f16 (extract_subvector (nxv8f16 ZPR:$Zs), (iPTR 0))), + (UUNPKLO_ZZ_S ZPR:$Zs)>; + def : Pat<(nxv4f16 (extract_subvector (nxv8f16 ZPR:$Zs), (iPTR 4))), + (UUNPKHI_ZZ_S ZPR:$Zs)>; + def : Pat<(nxv2f32 (extract_subvector (nxv4f32 ZPR:$Zs), (iPTR 0))), + (UUNPKLO_ZZ_D ZPR:$Zs)>; + def : Pat<(nxv2f32 (extract_subvector (nxv4f32 ZPR:$Zs), (iPTR 2))), + (UUNPKHI_ZZ_D ZPR:$Zs)>; + // Concatenate two predicates. def : Pat<(nxv4i1 (concat_vectors nxv2i1:$p1, nxv2i1:$p2)), (UZP1_PPP_S $p1, $p2)>; Index: llvm/test/CodeGen/AArch64/sve-extract-subvector.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-extract-subvector.ll +++ llvm/test/CodeGen/AArch64/sve-extract-subvector.ll @@ -28,5 +28,43 @@ ret %ext } +define @extract_lo_nxv4f16_nxv8f16( %z0) { +; CHECK-LABEL: extract_lo_nxv4f16_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: ret + %ext = call @llvm.aarch64.sve.tuple.get.nxv8f16( %z0, i32 0) + ret %ext +} + +define @extract_hi_nxv4f16_nxv8f16( %z0) { +; CHECK-LABEL: extract_hi_nxv4f16_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: ret + %ext = call @llvm.aarch64.sve.tuple.get.nxv8f16( %z0, i32 1) + ret %ext +} + +define @extract_lo_nxv2f32_nxv4f32( %z0) { +; CHECK-LABEL: extract_lo_nxv2f32_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: ret + %ext = call @llvm.aarch64.sve.tuple.get.nxv4f32( %z0, i32 0) + ret %ext +} + +define @extract_hi_nxv2f32_nxv4f32( %z0) { +; CHECK-LABEL: extract_hi_nxv2f32_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: ret + %ext = call @llvm.aarch64.sve.tuple.get.nxv4f32( %z0, i32 1) + ret %ext +} + declare @llvm.aarch64.sve.tuple.get.nxv4i64(, i32) declare @llvm.aarch64.sve.tuple.get.nxv32i8(, i32) +declare @llvm.aarch64.sve.tuple.get.nxv4f32(, i32) +declare @llvm.aarch64.sve.tuple.get.nxv8f16(, i32)