diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -514,7 +514,8 @@ /// IDX is first scaled by the runtime scaling factor of T. Elements IDX /// through (IDX + num_elements(T) - 1) must be valid VECTOR indices. If this /// condition cannot be determined statically but is false at runtime, then - /// the result vector is undefined. + /// the result vector is undefined. The IDX parameter must be a vector index + /// constant type, which for most targets will be an integer pointer type. /// /// This operation supports extracting a fixed-width vector from a scalable /// vector, but not the other way around. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5560,6 +5560,11 @@ (VT.getVectorMinNumElements() + N2C->getZExtValue()) <= N1VT.getVectorMinNumElements()) && "Extract subvector overflow!"); + assert(N2C->getAPIntValue().getBitWidth() == + TLI->getVectorIdxTy(getDataLayout()) + .getSizeInBits() + .getFixedSize() && + "Constant index for EXTRACT_SUBVECTOR has an invalid size"); // Trivial extraction. if (VT == N1VT) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -14235,9 +14235,9 @@ uint64_t IdxConst = cast(Idx)->getZExtValue(); EVT ResVT = N->getValueType(0); uint64_t NumLanes = ResVT.getVectorElementCount().Min; + SDValue ExtIdx = DAG.getVectorIdxConstant(IdxConst * NumLanes, DL); SDValue Val = - DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1, - DAG.getConstant(IdxConst * NumLanes, DL, MVT::i32)); + DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1, ExtIdx); return DAG.getMergeValues({Val, Chain}, DL); } case Intrinsic::aarch64_sve_tuple_set: { @@ -14263,9 +14263,9 @@ if (I == IdxConst) Opnds.push_back(Vec); else { - Opnds.push_back( - DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, Vec.getValueType(), Tuple, - DAG.getConstant(I * NumLanes, DL, MVT::i32))); + SDValue ExtIdx = DAG.getVectorIdxConstant(I * NumLanes, DL); + Opnds.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, + Vec.getValueType(), Tuple, ExtIdx)); } } SDValue Concat = diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1155,6 +1155,16 @@ def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))), (ZIP2_PPP_B PPR:$Ps, (PFALSE))>; + // Extract subvectors from FP SVE vectors + def : Pat<(nxv4f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 0))), + (UUNPKLO_ZZ_S ZPR:$Zs)>; + def : Pat<(nxv4f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 4))), + (UUNPKHI_ZZ_S ZPR:$Zs)>; + def : Pat<(nxv2f32 (extract_subvector (nxv4f32 ZPR:$Zs), (i64 0))), + (UUNPKLO_ZZ_D ZPR:$Zs)>; + def : Pat<(nxv2f32 (extract_subvector (nxv4f32 ZPR:$Zs), (i64 2))), + (UUNPKHI_ZZ_D ZPR:$Zs)>; + // Concatenate two predicates. def : Pat<(nxv4i1 (concat_vectors nxv2i1:$p1, nxv2i1:$p2)), (UZP1_PPP_S $p1, $p2)>; diff --git a/llvm/test/CodeGen/AArch64/sve-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-extract-subvector.ll --- a/llvm/test/CodeGen/AArch64/sve-extract-subvector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-subvector.ll @@ -28,5 +28,43 @@ ret %ext } +define @extract_lo_nxv4f16_nxv8f16( %z0) { +; CHECK-LABEL: extract_lo_nxv4f16_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: ret + %ext = call @llvm.aarch64.sve.tuple.get.nxv8f16( %z0, i32 0) + ret %ext +} + +define @extract_hi_nxv4f16_nxv8f16( %z0) { +; CHECK-LABEL: extract_hi_nxv4f16_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: ret + %ext = call @llvm.aarch64.sve.tuple.get.nxv8f16( %z0, i32 1) + ret %ext +} + +define @extract_lo_nxv2f32_nxv4f32( %z0) { +; CHECK-LABEL: extract_lo_nxv2f32_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: ret + %ext = call @llvm.aarch64.sve.tuple.get.nxv4f32( %z0, i32 0) + ret %ext +} + +define @extract_hi_nxv2f32_nxv4f32( %z0) { +; CHECK-LABEL: extract_hi_nxv2f32_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: ret + %ext = call @llvm.aarch64.sve.tuple.get.nxv4f32( %z0, i32 1) + ret %ext +} + declare @llvm.aarch64.sve.tuple.get.nxv4i64(, i32) declare @llvm.aarch64.sve.tuple.get.nxv32i8(, i32) +declare @llvm.aarch64.sve.tuple.get.nxv4f32(, i32) +declare @llvm.aarch64.sve.tuple.get.nxv8f16(, i32)