diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -815,6 +815,14 @@ MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO, int64_t Offset, uint64_t Size); + /// getMachineMemOperand - Allocate a new MachineMemOperand by copying + /// an existing one, replacing only the MachinePointerInfo and size. + /// MachineMemOperands are owned by the MachineFunction and need not be + /// explicitly deallocated. + MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO, + MachinePointerInfo &PtrInfo, + uint64_t Size); + /// Allocate a new MachineMemOperand by copying an existing one, /// replacing only AliasAnalysis information. MachineMemOperands are owned /// by the MachineFunction and need not be explicitly deallocated. diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -474,6 +474,13 @@ SSID, Ordering, FailureOrdering); } +MachineMemOperand *MachineFunction::getMachineMemOperand( + const MachineMemOperand *MMO, MachinePointerInfo &PtrInfo, uint64_t Size) { + return new (Allocator) MachineMemOperand( + PtrInfo, MMO->getFlags(), Size, Alignment, AAMDNodes(), nullptr, + MMO->getSyncScopeID(), MMO->getOrdering(), MMO->getFailureOrdering()); +} + MachineMemOperand * MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, int64_t Offset, uint64_t Size) { diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -19338,19 +19338,15 @@ return SDValue(); unsigned Index = ExtIdx->getZExtValue(); - unsigned NumElts = VT.getVectorNumElements(); + unsigned NumElts = VT.getVectorMinNumElements(); - // If the index is a multiple of the extract element count, we can offset the - // address by the store size multiplied by the subvector index. Otherwise if - // the scalar type is byte sized, we can just use the index multiplied by - // the element size in bytes as the offset. - unsigned Offset; - if (Index % NumElts == 0) - Offset = (Index / NumElts) * VT.getStoreSize(); - else if (VT.getScalarType().isByteSized()) - Offset = Index * VT.getScalarType().getStoreSize(); - else - return SDValue(); + // The definition of EXTRACT_SUBVECTOR states that the index must be a + // multiple of the minimum number of elements in the result type. + assert(Index % NumElts == 0 && "The extract subvector index is not a " + "multiple of the result's element count"); + + // It's fine to use TypeSize here as we know the offset will not be negative. + TypeSize Offset = VT.getStoreSize() * (Index / NumElts); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT)) @@ -19359,14 +19355,21 @@ // The narrow load will be offset from the base address of the old load if // we are extracting from something besides index 0 (little-endian). SDLoc DL(Extract); - SDValue BaseAddr = Ld->getBasePtr(); // TODO: Use "BaseIndexOffset" to make this more effective. - SDValue NewAddr = - DAG.getMemBasePlusOffset(BaseAddr, TypeSize::Fixed(Offset), DL); + SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL); + + uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize()); MachineFunction &MF = DAG.getMachineFunction(); - MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset, - VT.getStoreSize()); + MachineMemOperand *MMO; + if (Offset.isScalable()) { + MachinePointerInfo MPI = + MachinePointerInfo(Ld->getPointerInfo().getAddrSpace()); + MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize); + } else + MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(), + StoreSize); + SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO); DAG.makeEquivalentMemoryOrdering(Ld, NewLd); return NewLd; diff --git a/llvm/test/CodeGen/AArch64/sve-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-extract-subvector.ll --- a/llvm/test/CodeGen/AArch64/sve-extract-subvector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-subvector.ll @@ -64,7 +64,19 @@ ret %ext } +define @load_extract_nxv4f32_nxv8f32(* %p) { +; CHECK-LABEL: load_extract_nxv4f32_nxv8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ret + %tmp1 = load , * %p, align 16 + %tmp2 = call @llvm.aarch64.sve.tuple.get.nxv8f32( %tmp1, i32 1) + ret %tmp2 +} + declare @llvm.aarch64.sve.tuple.get.nxv4i64(, i32) declare @llvm.aarch64.sve.tuple.get.nxv32i8(, i32) declare @llvm.aarch64.sve.tuple.get.nxv4f32(, i32) declare @llvm.aarch64.sve.tuple.get.nxv8f16(, i32) +declare @llvm.aarch64.sve.tuple.get.nxv8f32(, i32)