Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -1252,7 +1252,7 @@ Elm = PointerTy.getTypeForEVT(Ty->getContext()); } return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(Elm, false), - VTy->getNumElements()); + VTy->getElementCount()); } return getValueType(DL, Ty, AllowUnknown); Index: llvm/lib/Analysis/Loads.cpp =================================================================== --- llvm/lib/Analysis/Loads.cpp +++ llvm/lib/Analysis/Loads.cpp @@ -140,7 +140,7 @@ const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT) { - if (!Ty->isSized()) + if (!Ty->isSized() || (Ty->isVectorTy() && Ty->getVectorIsScalable())) return false; // When dereferenceability information is provided by a dereferenceable Index: llvm/lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- llvm/lib/CodeGen/CodeGenPrepare.cpp +++ llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -6770,6 +6770,9 @@ const TargetLowering &TLI) { // Handle simple but common cases only. Type *StoreType = SI.getValueOperand()->getType(); + if (StoreType->isVectorTy() && StoreType->getVectorIsScalable()) + return false; + if (!DL.typeSizeEqualsStoreSize(StoreType) || DL.getTypeSizeInBits(StoreType) == 0) return false; Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -15692,6 +15692,9 @@ return false; EVT MemVT = St->getMemoryVT(); + if (MemVT.isScalableVector()) + return false; + int64_t ElementSizeBytes = MemVT.getStoreSize(); unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; @@ -20742,9 +20745,12 @@ : (LSN->getAddressingMode() == ISD::PRE_DEC) ? -1 * C->getSExtValue() : 0; + uint64_t Size = LSN->getMemoryVT().isScalableVector() + ? MemoryLocation::UnknownSize + : LSN->getMemoryVT().getStoreSize(); return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(), Offset /*base offset*/, - Optional(LSN->getMemoryVT().getStoreSize()), + Optional(Size), LSN->getMemOperand()}; } if (const auto *LN = cast(N)) @@ -21024,6 +21030,9 @@ if (BasePtr.getBase().isUndef()) return false; + if (St->getMemoryVT().isScalableVector()) + return false; + // Add ST's interval. Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit); Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/ISDOpcodes.h" @@ -6722,9 +6723,11 @@ if (PtrInfo.V.isNull()) PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset); + uint64_t Size = MemVT.isScalableVector() ? MemoryLocation::UnknownSize + : MemVT.getStoreSize(); MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand( - PtrInfo, MMOFlags, MemVT.getStoreSize(), Alignment, AAInfo, Ranges); + PtrInfo, MMOFlags, Size, Alignment, AAInfo, Ranges); return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO); } @@ -6844,8 +6847,11 @@ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); MachineFunction &MF = getMachineFunction(); - MachineMemOperand *MMO = MF.getMachineMemOperand( - PtrInfo, MMOFlags, Val.getValueType().getStoreSize(), Alignment, AAInfo); + EVT MemVT = Val.getValueType(); + uint64_t Size = MemVT.isScalableVector() ? MemoryLocation::UnknownSize + : MemVT.getStoreSize(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo); return getStore(Chain, dl, Val, Ptr, MMO); } Index: llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -184,6 +184,8 @@ void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); + bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm); + void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc); void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc); void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); @@ -1314,6 +1316,26 @@ ReplaceNode(N, St); } +bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, + SDValue &OffImm) { + SDLoc dl(N); + + // If this is not a frame index, load directly from this address + if (N->getOpcode() != ISD::FrameIndex) { + Base = N; + OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); + return true; + } + + // Otherwise, match it for the frame address + const DataLayout &DL = CurDAG->getDataLayout(); + const TargetLowering *TLI = getTargetLowering(); + int FI = cast(N)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); + OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); + return true; +} + void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc) { SDLoc dl(N); Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9175,11 +9175,15 @@ if (AM.HasBaseReg && AM.BaseOffs && AM.Scale) return false; + // FIXME: Update this method to support scalable addressing modes. + if (Ty->isVectorTy() && Ty->getVectorIsScalable()) + return AM.HasBaseReg && !AM.BaseOffs && !AM.Scale; + // check reg + imm case: // i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12 uint64_t NumBytes = 0; if (Ty->isSized()) { - uint64_t NumBits = DL.getTypeSizeInBits(Ty); + uint64_t NumBits = DL.getTypeSizeInBits(Ty).getKnownMinSize(); NumBytes = NumBits / 8; if (!isPowerOf2_64(NumBits)) NumBytes = 0; Index: llvm/lib/Target/AArch64/AArch64InstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -349,6 +349,8 @@ let PrintMethod = "printImmScale<16>"; } +def am_sve_fi : ComplexPattern; + def am_indexed7s8 : ComplexPattern; def am_indexed7s16 : ComplexPattern; def am_indexed7s32 : ComplexPattern; Index: llvm/lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1683,6 +1683,8 @@ case AArch64::STRSui: case AArch64::STRDui: case AArch64::STRQui: + case AArch64::LDR_PXI: + case AArch64::STR_PXI: if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) { FrameIndex = MI.getOperand(1).getIndex(); @@ -1795,9 +1797,19 @@ case AArch64::STNPSi: case AArch64::LDG: case AArch64::STGPi: + case AArch64::LD1B_IMM: + case AArch64::LD1H_IMM: + case AArch64::LD1W_IMM: + case AArch64::LD1D_IMM: + case AArch64::ST1B_IMM: + case AArch64::ST1H_IMM: + case AArch64::ST1W_IMM: + case AArch64::ST1D_IMM: return 3; case AArch64::ADDG: case AArch64::STGOffset: + case AArch64::LDR_PXI: + case AArch64::STR_PXI: return 2; } } @@ -2043,6 +2055,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale, unsigned &Width, int64_t &MinOffset, int64_t &MaxOffset) { + const unsigned SVEMaxBytesPerVector = AArch64::SVEMaxBitsPerVector / 8; switch (Opcode) { // Not a memory operation or something we want to handle. default: @@ -2206,10 +2219,26 @@ break; case AArch64::LDR_ZXI: case AArch64::STR_ZXI: - Scale = Width = 16; + Scale = 16; + Width = SVEMaxBytesPerVector; MinOffset = -256; MaxOffset = 255; break; + case AArch64::LD1B_IMM: + case AArch64::LD1H_IMM: + case AArch64::LD1W_IMM: + case AArch64::LD1D_IMM: + case AArch64::ST1B_IMM: + case AArch64::ST1H_IMM: + case AArch64::ST1W_IMM: + case AArch64::ST1D_IMM: + // A full vectors worth of data + // Width = mbytes * elements + Scale = 16; + Width = SVEMaxBytesPerVector; + MinOffset = -8; + MaxOffset = 7; + break; case AArch64::ST2GOffset: case AArch64::STZ2GOffset: Scale = 16; @@ -3381,6 +3410,14 @@ case AArch64::STR_ZXI: case AArch64::LDR_PXI: case AArch64::STR_PXI: + case AArch64::LD1B_IMM: + case AArch64::LD1H_IMM: + case AArch64::LD1W_IMM: + case AArch64::LD1D_IMM: + case AArch64::ST1B_IMM: + case AArch64::ST1H_IMM: + case AArch64::ST1W_IMM: + case AArch64::ST1D_IMM: return true; default: return false; Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1154,6 +1154,48 @@ // 16-element contiguous stores defm : pred_store; + multiclass spill_fill_predicate { + // reg + imm (frame-index) + def : Pat<(Ty (load (am_sve_fi GPR64sp:$base, simm9:$offset))), + (Load GPR64sp:$base, simm9:$offset)>; + def : Pat<(store (Ty PPR:$val), (am_sve_fi GPR64sp:$base, simm9:$offset)), + (Store PPR:$val, GPR64sp:$base, simm9:$offset)>; + } + + let Predicates = [IsLE] in { + defm Pat_SpillFill_P16 : spill_fill_predicate; + defm Pat_SpillFill_P8 : spill_fill_predicate; + defm Pat_SpillFill_P4 : spill_fill_predicate; + defm Pat_SpillFill_P2 : spill_fill_predicate; + } + + multiclass unpred_store { + // reg + imm (frame-index) + def _reg_imm : Pat<(store (Ty ZPR:$val), (am_sve_fi GPR64sp:$base, simm4s1:$offset)), + (RegImmInst ZPR:$val, (PTrue 31), GPR64sp:$base, simm4s1:$offset)>; + } + + defm Pat_ST1B : unpred_store; + defm Pat_ST1H : unpred_store; + defm Pat_ST1W : unpred_store; + defm Pat_ST1D : unpred_store; + defm Pat_ST1H_float16: unpred_store; + defm Pat_ST1W_float : unpred_store; + defm Pat_ST1D_double : unpred_store; + + multiclass unpred_load { + // reg + imm (frame-index) + def _reg_imm : Pat<(Ty (load (am_sve_fi GPR64sp:$base, simm4s1:$offset))), + (RegImmInst (PTrue 31), GPR64sp:$base, simm4s1:$offset)>; + } + + defm Pat_LD1B : unpred_load; + defm Pat_LD1H : unpred_load; + defm Pat_LD1W : unpred_load; + defm Pat_LD1D : unpred_load; + defm Pat_LD1H_float16: unpred_load; + defm Pat_LD1W_float : unpred_load; + defm Pat_LD1D_double : unpred_load; } let Predicates = [HasSVE2] in { Index: llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h =================================================================== --- llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -652,6 +652,7 @@ // in index i*P of a vector. The other elements of the // vector (such as index 1) are undefined. static constexpr unsigned SVEBitsPerBlock = 128; +static constexpr unsigned SVEMaxBitsPerVector = 2048; } // end namespace AArch64 } // end namespace llvm Index: llvm/test/CodeGen/AArch64/spillfill-sve.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/spillfill-sve.ll @@ -0,0 +1,189 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve < %s | FileCheck %s + +; This file checks that unpredicated load/store instructions to locals +; use the right instructions and offsets. + +; Data fills + +define void @fill_nxv16i8() { +; CHECK-LABEL: fill_nxv16i8 +; CHECK-DAG: ld1b { z{{[01]}}.b }, p0/z, [sp] +; CHECK-DAG: ld1b { z{{[01]}}.b }, p0/z, [sp, #1, mul vl] + %local0 = alloca + %local1 = alloca + load volatile , * %local0 + load volatile , * %local1 + ret void +} + +define void @fill_nxv8i16() { +; CHECK-LABEL: fill_nxv8i16 +; CHECK-DAG: ld1h { z{{[01]}}.h }, p0/z, [sp] +; CHECK-DAG: ld1h { z{{[01]}}.h }, p0/z, [sp, #1, mul vl] + %local0 = alloca + %local1 = alloca + load volatile , * %local0 + load volatile , * %local1 + ret void +} + +define void @fill_nxv4i32() { +; CHECK-LABEL: fill_nxv4i32 +; CHECK-DAG: ld1w { z{{[01]}}.s }, p0/z, [sp] +; CHECK-DAG: ld1w { z{{[01]}}.s }, p0/z, [sp, #1, mul vl] + %local0 = alloca + %local1 = alloca + load volatile , * %local0 + load volatile , * %local1 + ret void +} + +define void @fill_nxv2i64() { +; CHECK-LABEL: fill_nxv2i64 +; CHECK-DAG: ld1d { z{{[01]}}.d }, p0/z, [sp] +; CHECK-DAG: ld1d { z{{[01]}}.d }, p0/z, [sp, #1, mul vl] + %local0 = alloca + %local1 = alloca + load volatile , * %local0 + load volatile , * %local1 + ret void +} + + +; Data spills + +define void @spill_nxv16i8( %v0, %v1) { +; CHECK-LABEL: spill_nxv16i8 +; CHECK-DAG: st1b { z{{[01]}}.b }, p0, [sp] +; CHECK-DAG: st1b { z{{[01]}}.b }, p0, [sp, #1, mul vl] + %local0 = alloca + %local1 = alloca + store volatile %v0, * %local0 + store volatile %v1, * %local1 + ret void +} + +define void @spill_nxv8i16( %v0, %v1) { +; CHECK-LABEL: spill_nxv8i16 +; CHECK-DAG: st1h { z{{[01]}}.h }, p0, [sp] +; CHECK-DAG: st1h { z{{[01]}}.h }, p0, [sp, #1, mul vl] + %local0 = alloca + %local1 = alloca + store volatile %v0, * %local0 + store volatile %v1, * %local1 + ret void +} + +define void @spill_nxv4i32( %v0, %v1) { +; CHECK-LABEL: spill_nxv4i32 +; CHECK-DAG: st1w { z{{[01]}}.s }, p0, [sp] +; CHECK-DAG: st1w { z{{[01]}}.s }, p0, [sp, #1, mul vl] + %local0 = alloca + %local1 = alloca + store volatile %v0, * %local0 + store volatile %v1, * %local1 + ret void +} + +define void @spill_nxv2i64( %v0, %v1) { +; CHECK-LABEL: spill_nxv2i64 +; CHECK-DAG: st1d { z{{[01]}}.d }, p0, [sp] +; CHECK-DAG: st1d { z{{[01]}}.d }, p0, [sp, #1, mul vl] + %local0 = alloca + %local1 = alloca + store volatile %v0, * %local0 + store volatile %v1, * %local1 + ret void +} + +; Predicate fills + +define void @fill_nxv16i1() { +; CHECK-LABEL: fill_nxv16i1 +; CHECK-DAG: ldr p{{[01]}}, [sp, #8, mul vl] +; CHECK-DAG: ldr p{{[01]}}, [sp] + %local0 = alloca + %local1 = alloca + load volatile , * %local0 + load volatile , * %local1 + ret void +} + +define void @fill_nxv8i1() { +; CHECK-LABEL: fill_nxv8i1 +; CHECK-DAG: ldr p{{[01]}}, [sp, #4, mul vl] +; CHECK-DAG: ldr p{{[01]}}, [sp] + %local0 = alloca + %local1 = alloca + load volatile , * %local0 + load volatile , * %local1 + ret void +} + +define void @fill_nxv4i1() { +; CHECK-LABEL: fill_nxv4i1 +; CHECK-DAG: ldr p{{[01]}}, [sp, #6, mul vl] +; CHECK-DAG: ldr p{{[01]}}, [sp, #4, mul vl] + %local0 = alloca + %local1 = alloca + load volatile , * %local0 + load volatile , * %local1 + ret void +} + +define void @fill_nxv2i1() { +; CHECK-LABEL: fill_nxv2i1 +; CHECK-DAG: ldr p{{[01]}}, [sp, #7, mul vl] +; CHECK-DAG: ldr p{{[01]}}, [sp, #6, mul vl] + %local0 = alloca + %local1 = alloca + load volatile , * %local0 + load volatile , * %local1 + ret void +} + +; Predicate spills + +define void @spill_nxv16i1( %v0, %v1) { +; CHECK-LABEL: spill_nxv16i1 +; CHECK-DAG: str p{{[01]}}, [sp, #8, mul vl] +; CHECK-DAG: str p{{[01]}}, [sp] + %local0 = alloca + %local1 = alloca + store volatile %v0, * %local0 + store volatile %v1, * %local1 + ret void +} + +define void @spill_nxv8i1( %v0, %v1) { +; CHECK-LABEL: spill_nxv8i1 +; CHECK-DAG: str p{{[01]}}, [sp, #4, mul vl] +; CHECK-DAG: str p{{[01]}}, [sp] + %local0 = alloca + %local1 = alloca + store volatile %v0, * %local0 + store volatile %v1, * %local1 + ret void +} + +define void @spill_nxv4i1( %v0, %v1) { +; CHECK-LABEL: spill_nxv4i1 +; CHECK-DAG: str p{{[01]}}, [sp, #6, mul vl] +; CHECK-DAG: str p{{[01]}}, [sp, #4, mul vl] + %local0 = alloca + %local1 = alloca + store volatile %v0, * %local0 + store volatile %v1, * %local1 + ret void +} + +define void @spill_nxv2i1( %v0, %v1) { +; CHECK-LABEL: spill_nxv2i1 +; CHECK-DAG: str p{{[01]}}, [sp, #7, mul vl] +; CHECK-DAG: str p{{[01]}}, [sp, #6, mul vl] + %local0 = alloca + %local1 = alloca + store volatile %v0, * %local0 + store volatile %v1, * %local1 + ret void +}