Index: llvm/include/llvm/CodeGen/MachineFunction.h =================================================================== --- llvm/include/llvm/CodeGen/MachineFunction.h +++ llvm/include/llvm/CodeGen/MachineFunction.h @@ -815,6 +815,14 @@ MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO, int64_t Offset, uint64_t Size); + /// getMachineMemOperand - Allocate a new MachineMemOperand by copying + /// an existing one, replacing only the MachinePointerInfo and size. + /// MachineMemOperands are owned by the MachineFunction and need not be + /// explicitly deallocated. + MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO, + MachinePointerInfo &PtrInfo, + uint64_t Size); + /// Allocate a new MachineMemOperand by copying an existing one, /// replacing only AliasAnalysis information. MachineMemOperands are owned /// by the MachineFunction and need not be explicitly deallocated. Index: llvm/lib/CodeGen/MachineFunction.cpp =================================================================== --- llvm/lib/CodeGen/MachineFunction.cpp +++ llvm/lib/CodeGen/MachineFunction.cpp @@ -474,6 +474,13 @@ SSID, Ordering, FailureOrdering); } +MachineMemOperand *MachineFunction::getMachineMemOperand( + const MachineMemOperand *MMO, MachinePointerInfo &PtrInfo, uint64_t Size) { + return new (Allocator) MachineMemOperand( + PtrInfo, MMO->getFlags(), Size, Alignment, AAMDNodes(), nullptr, + MMO->getSyncScopeID(), MMO->getOrdering(), MMO->getFailureOrdering()); +} + MachineMemOperand * MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, int64_t Offset, uint64_t Size) { Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -19312,16 +19312,18 @@ return SDValue(); unsigned Index = ExtIdx->getZExtValue(); - unsigned NumElts = VT.getVectorNumElements(); + unsigned NumElts = VT.getVectorMinNumElements(); // If the index is a multiple of the extract element count, we can offset the // address by the store size multiplied by the subvector index. Otherwise if // the scalar type is byte sized, we can just use the index multiplied by // the element size in bytes as the offset. - unsigned Offset; + + // It's fine to use TypeSize here as we know the offset will not be negative. + TypeSize Offset = TypeSize::Fixed(0); if (Index % NumElts == 0) - Offset = (Index / NumElts) * VT.getStoreSize(); - else if (VT.getScalarType().isByteSized()) + Offset = VT.getStoreSize() * (Index / NumElts); + else if (!VT.isScalableVector() && VT.getScalarType().isByteSized()) Offset = Index * VT.getScalarType().getStoreSize(); else return SDValue(); @@ -19333,13 +19335,24 @@ // The narrow load will be offset from the base address of the old load if // we are extracting from something besides index 0 (little-endian). SDLoc DL(Extract); + SDValue BaseAddr = Ld->getBasePtr(); // TODO: Use "BaseIndexOffset" to make this more effective. - SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL); + SDValue NewAddr = + DAG.getMemBasePlusOffset(BaseAddr, Offset.getKnownMinSize(), DL, + SDNodeFlags(), Offset.isScalable()); + + uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize()); MachineFunction &MF = DAG.getMachineFunction(); - MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset, - VT.getStoreSize()); + MachineMemOperand *MMO; + if (Offset.isScalable()) { + MachinePointerInfo MPI = + MachinePointerInfo(Ld->getPointerInfo().getAddrSpace()); + MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize); + } else + MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset, StoreSize); + SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO); DAG.makeEquivalentMemoryOrdering(Ld, NewLd); return NewLd; Index: llvm/test/CodeGen/AArch64/sve-extract-subvector.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-extract-subvector.ll +++ llvm/test/CodeGen/AArch64/sve-extract-subvector.ll @@ -28,5 +28,18 @@ ret %ext } + +define @load_extract_nxv4f32_nxv8f32(* %p) { +; CHECK-LABEL: load_extract_nxv4f32_nxv8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ret + %tmp1 = load , * %p, align 16 + %tmp2 = call @llvm.aarch64.sve.tuple.get.nxv8f32( %tmp1, i32 1) + ret %tmp2 +} + declare @llvm.aarch64.sve.tuple.get.nxv4i64(, i32) declare @llvm.aarch64.sve.tuple.get.nxv32i8(, i32) +declare @llvm.aarch64.sve.tuple.get.nxv8f32(, i32)