Index: llvm/include/llvm/Analysis/MemoryLocation.h =================================================================== --- llvm/include/llvm/Analysis/MemoryLocation.h +++ llvm/include/llvm/Analysis/MemoryLocation.h @@ -19,6 +19,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Metadata.h" +#include "llvm/Support/TypeSize.h" namespace llvm { @@ -240,6 +241,12 @@ return getForArgument(Call, ArgIdx, &TLI); } + // Return the exact size if the exact size is known at compiletime, + // otherwise return MemoryLocation::UnknownSize. + static uint64_t getSizeOrUnknown(const TypeSize &T) { + return T.isScalable() ? UnknownSize : T.getFixedSize(); + } + explicit MemoryLocation(const Value *Ptr = nullptr, LocationSize Size = LocationSize::unknown(), const AAMDNodes &AATags = AAMDNodes()) Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -1252,7 +1252,7 @@ Elm = PointerTy.getTypeForEVT(Ty->getContext()); } return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(Elm, false), - VTy->getNumElements()); + VTy->getElementCount()); } return getValueType(DL, Ty, AllowUnknown); Index: llvm/lib/Analysis/Loads.cpp =================================================================== --- llvm/lib/Analysis/Loads.cpp +++ llvm/lib/Analysis/Loads.cpp @@ -140,7 +140,9 @@ const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT) { - if (!Ty->isSized()) + // For unsized types or scalable vectors we don't know exactly how many bytes + // are dereferenceable, so bail out. + if (!Ty->isSized() || (Ty->isVectorTy() && Ty->getVectorIsScalable())) return false; // When dereferenceability information is provided by a dereferenceable Index: llvm/lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- llvm/lib/CodeGen/CodeGenPrepare.cpp +++ llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -6770,6 +6770,14 @@ const TargetLowering &TLI) { // Handle simple but common cases only. Type *StoreType = SI.getValueOperand()->getType(); + + // The code below assumes shifting a value by , + // whereas scalable vectors would have to be shifted by + // <2log(vscale) + number of bits> in order to store the + // low/high parts. Bailing out for now. + if (StoreType->isVectorTy() && StoreType->getVectorIsScalable()) + return false; + if (!DL.typeSizeEqualsStoreSize(StoreType) || DL.getTypeSizeInBits(StoreType) == 0) return false; Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -15691,7 +15691,12 @@ if (OptLevel == CodeGenOpt::None || !EnableStoreMerging) return false; + // With scalable vectors it is not known how many consecutive stores could + // make up a wider one due to vscale being unknown at compile-time, so bail out. EVT MemVT = St->getMemoryVT(); + if (MemVT.isScalableVector()) + return false; + int64_t ElementSizeBytes = MemVT.getStoreSize(); unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; @@ -20742,9 +20747,11 @@ : (LSN->getAddressingMode() == ISD::PRE_DEC) ? -1 * C->getSExtValue() : 0; + uint64_t Size = + MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize()); return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(), Offset /*base offset*/, - Optional(LSN->getMemoryVT().getStoreSize()), + Optional(Size), LSN->getMemOperand()}; } if (const auto *LN = cast(N)) @@ -21024,6 +21031,12 @@ if (BasePtr.getBase().isUndef()) return false; + // BaseIndexOffset assumes that offsets are fixed-size, which + // is not valid for scalable vectors where the offsets are + // scaled by `vscale`, so bail out early. + if (St->getMemoryVT().isScalableVector()) + return false; + // Add ST's interval. Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit); Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/ISDOpcodes.h" @@ -6722,9 +6723,10 @@ if (PtrInfo.V.isNull()) PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset); + uint64_t Size = MemoryLocation::getSizeOrUnknown(MemVT.getStoreSize()); MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand( - PtrInfo, MMOFlags, MemVT.getStoreSize(), Alignment, AAInfo, Ranges); + PtrInfo, MMOFlags, Size, Alignment, AAInfo, Ranges); return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO); } @@ -6844,8 +6846,10 @@ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); MachineFunction &MF = getMachineFunction(); - MachineMemOperand *MMO = MF.getMachineMemOperand( - PtrInfo, MMOFlags, Val.getValueType().getStoreSize(), Alignment, AAInfo); + uint64_t Size = + MemoryLocation::getSizeOrUnknown(Val.getValueType().getStoreSize()); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo); return getStore(Chain, dl, Val, Ptr, MMO); } Index: llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -184,6 +184,8 @@ void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); + bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm); + void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc); void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc); void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); @@ -1314,6 +1316,19 @@ ReplaceNode(N, St); } +bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, + SDValue &OffImm) { + SDLoc dl(N); + + // Otherwise, match it for the frame address + const DataLayout &DL = CurDAG->getDataLayout(); + const TargetLowering *TLI = getTargetLowering(); + int FI = cast(N)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); + OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); + return true; +} + void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc) { SDLoc dl(N); Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9175,6 +9175,10 @@ if (AM.HasBaseReg && AM.BaseOffs && AM.Scale) return false; + // FIXME: Update this method to support scalable addressing modes. + if (Ty->isVectorTy() && Ty->getVectorIsScalable()) + return AM.HasBaseReg && !AM.BaseOffs && !AM.Scale; + // check reg + imm case: // i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12 uint64_t NumBytes = 0; Index: llvm/lib/Target/AArch64/AArch64InstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -349,6 +349,8 @@ let PrintMethod = "printImmScale<16>"; } +def am_sve_fi : ComplexPattern; + def am_indexed7s8 : ComplexPattern; def am_indexed7s16 : ComplexPattern; def am_indexed7s32 : ComplexPattern; Index: llvm/lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1683,6 +1683,8 @@ case AArch64::STRSui: case AArch64::STRDui: case AArch64::STRQui: + case AArch64::LDR_PXI: + case AArch64::STR_PXI: if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() && MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) { FrameIndex = MI.getOperand(1).getIndex(); @@ -1795,9 +1797,19 @@ case AArch64::STNPSi: case AArch64::LDG: case AArch64::STGPi: + case AArch64::LD1B_IMM: + case AArch64::LD1H_IMM: + case AArch64::LD1W_IMM: + case AArch64::LD1D_IMM: + case AArch64::ST1B_IMM: + case AArch64::ST1H_IMM: + case AArch64::ST1W_IMM: + case AArch64::ST1D_IMM: return 3; case AArch64::ADDG: case AArch64::STGOffset: + case AArch64::LDR_PXI: + case AArch64::STR_PXI: return 2; } } @@ -2043,6 +2055,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale, unsigned &Width, int64_t &MinOffset, int64_t &MaxOffset) { + const unsigned SVEMaxBytesPerVector = AArch64::SVEMaxBitsPerVector / 8; switch (Opcode) { // Not a memory operation or something we want to handle. default: @@ -2200,16 +2213,33 @@ break; case AArch64::LDR_PXI: case AArch64::STR_PXI: - Scale = Width = 2; + Scale = 2; + Width = SVEMaxBytesPerVector / 8; MinOffset = -256; MaxOffset = 255; break; case AArch64::LDR_ZXI: case AArch64::STR_ZXI: - Scale = Width = 16; + Scale = 16; + Width = SVEMaxBytesPerVector; MinOffset = -256; MaxOffset = 255; break; + case AArch64::LD1B_IMM: + case AArch64::LD1H_IMM: + case AArch64::LD1W_IMM: + case AArch64::LD1D_IMM: + case AArch64::ST1B_IMM: + case AArch64::ST1H_IMM: + case AArch64::ST1W_IMM: + case AArch64::ST1D_IMM: + // A full vectors worth of data + // Width = mbytes * elements + Scale = 16; + Width = SVEMaxBytesPerVector; + MinOffset = -8; + MaxOffset = 7; + break; case AArch64::ST2GOffset: case AArch64::STZ2GOffset: Scale = 16; @@ -3381,6 +3411,14 @@ case AArch64::STR_ZXI: case AArch64::LDR_PXI: case AArch64::STR_PXI: + case AArch64::LD1B_IMM: + case AArch64::LD1H_IMM: + case AArch64::LD1W_IMM: + case AArch64::LD1D_IMM: + case AArch64::ST1B_IMM: + case AArch64::ST1H_IMM: + case AArch64::ST1W_IMM: + case AArch64::ST1D_IMM: return true; default: return false; Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1154,6 +1154,46 @@ // 16-element contiguous stores defm : pred_store; + multiclass spill_fill_predicate { + // reg + imm (frame-index) + def : Pat<(Ty (load (am_sve_fi GPR64sp:$base, simm9:$offset))), + (Load GPR64sp:$base, simm9:$offset)>; + def : Pat<(store (Ty PPR:$val), (am_sve_fi GPR64sp:$base, simm9:$offset)), + (Store PPR:$val, GPR64sp:$base, simm9:$offset)>; + } + + defm Pat_SpillFill_P16 : spill_fill_predicate; + defm Pat_SpillFill_P8 : spill_fill_predicate; + defm Pat_SpillFill_P4 : spill_fill_predicate; + defm Pat_SpillFill_P2 : spill_fill_predicate; + + multiclass unpred_store { + // reg + imm (frame-index) + def _reg_imm : Pat<(store (Ty ZPR:$val), (am_sve_fi GPR64sp:$base, simm4s1:$offset)), + (RegImmInst ZPR:$val, (PTrue 31), GPR64sp:$base, simm4s1:$offset)>; + } + + defm Pat_ST1B : unpred_store; + defm Pat_ST1H : unpred_store; + defm Pat_ST1W : unpred_store; + defm Pat_ST1D : unpred_store; + defm Pat_ST1H_float16: unpred_store; + defm Pat_ST1W_float : unpred_store; + defm Pat_ST1D_double : unpred_store; + + multiclass unpred_load { + // reg + imm (frame-index) + def _reg_imm : Pat<(Ty (load (am_sve_fi GPR64sp:$base, simm4s1:$offset))), + (RegImmInst (PTrue 31), GPR64sp:$base, simm4s1:$offset)>; + } + + defm Pat_LD1B : unpred_load; + defm Pat_LD1H : unpred_load; + defm Pat_LD1W : unpred_load; + defm Pat_LD1D : unpred_load; + defm Pat_LD1H_float16: unpred_load; + defm Pat_LD1W_float : unpred_load; + defm Pat_LD1D_double : unpred_load; } let Predicates = [HasSVE2] in { Index: llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h =================================================================== --- llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -652,6 +652,7 @@ // in index i*P of a vector. The other elements of the // vector (such as index 1) are undefined. static constexpr unsigned SVEBitsPerBlock = 128; +static constexpr unsigned SVEMaxBitsPerVector = 2048; } // end namespace AArch64 } // end namespace llvm Index: llvm/test/CodeGen/AArch64/spillfill-sve.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/spillfill-sve.ll @@ -0,0 +1,189 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve < %s | FileCheck %s + +; This file checks that unpredicated load/store instructions to locals +; use the right instructions and offsets. + +; Data fills + +define void @fill_nxv16i8() { +; CHECK-LABEL: fill_nxv16i8 +; CHECK-DAG: ld1b { z{{[01]}}.b }, p0/z, [sp] +; CHECK-DAG: ld1b { z{{[01]}}.b }, p0/z, [sp, #1, mul vl] + %local0 = alloca + %local1 = alloca + load volatile , * %local0 + load volatile , * %local1 + ret void +} + +define void @fill_nxv8i16() { +; CHECK-LABEL: fill_nxv8i16 +; CHECK-DAG: ld1h { z{{[01]}}.h }, p0/z, [sp] +; CHECK-DAG: ld1h { z{{[01]}}.h }, p0/z, [sp, #1, mul vl] + %local0 = alloca + %local1 = alloca + load volatile , * %local0 + load volatile , * %local1 + ret void +} + +define void @fill_nxv4i32() { +; CHECK-LABEL: fill_nxv4i32 +; CHECK-DAG: ld1w { z{{[01]}}.s }, p0/z, [sp] +; CHECK-DAG: ld1w { z{{[01]}}.s }, p0/z, [sp, #1, mul vl] + %local0 = alloca + %local1 = alloca + load volatile , * %local0 + load volatile , * %local1 + ret void +} + +define void @fill_nxv2i64() { +; CHECK-LABEL: fill_nxv2i64 +; CHECK-DAG: ld1d { z{{[01]}}.d }, p0/z, [sp] +; CHECK-DAG: ld1d { z{{[01]}}.d }, p0/z, [sp, #1, mul vl] + %local0 = alloca + %local1 = alloca + load volatile , * %local0 + load volatile , * %local1 + ret void +} + + +; Data spills + +define void @spill_nxv16i8( %v0, %v1) { +; CHECK-LABEL: spill_nxv16i8 +; CHECK-DAG: st1b { z{{[01]}}.b }, p0, [sp] +; CHECK-DAG: st1b { z{{[01]}}.b }, p0, [sp, #1, mul vl] + %local0 = alloca + %local1 = alloca + store volatile %v0, * %local0 + store volatile %v1, * %local1 + ret void +} + +define void @spill_nxv8i16( %v0, %v1) { +; CHECK-LABEL: spill_nxv8i16 +; CHECK-DAG: st1h { z{{[01]}}.h }, p0, [sp] +; CHECK-DAG: st1h { z{{[01]}}.h }, p0, [sp, #1, mul vl] + %local0 = alloca + %local1 = alloca + store volatile %v0, * %local0 + store volatile %v1, * %local1 + ret void +} + +define void @spill_nxv4i32( %v0, %v1) { +; CHECK-LABEL: spill_nxv4i32 +; CHECK-DAG: st1w { z{{[01]}}.s }, p0, [sp] +; CHECK-DAG: st1w { z{{[01]}}.s }, p0, [sp, #1, mul vl] + %local0 = alloca + %local1 = alloca + store volatile %v0, * %local0 + store volatile %v1, * %local1 + ret void +} + +define void @spill_nxv2i64( %v0, %v1) { +; CHECK-LABEL: spill_nxv2i64 +; CHECK-DAG: st1d { z{{[01]}}.d }, p0, [sp] +; CHECK-DAG: st1d { z{{[01]}}.d }, p0, [sp, #1, mul vl] + %local0 = alloca + %local1 = alloca + store volatile %v0, * %local0 + store volatile %v1, * %local1 + ret void +} + +; Predicate fills + +define void @fill_nxv16i1() { +; CHECK-LABEL: fill_nxv16i1 +; CHECK-DAG: ldr p{{[01]}}, [sp, #8, mul vl] +; CHECK-DAG: ldr p{{[01]}}, [sp] + %local0 = alloca + %local1 = alloca + load volatile , * %local0 + load volatile , * %local1 + ret void +} + +define void @fill_nxv8i1() { +; CHECK-LABEL: fill_nxv8i1 +; CHECK-DAG: ldr p{{[01]}}, [sp, #4, mul vl] +; CHECK-DAG: ldr p{{[01]}}, [sp] + %local0 = alloca + %local1 = alloca + load volatile , * %local0 + load volatile , * %local1 + ret void +} + +define void @fill_nxv4i1() { +; CHECK-LABEL: fill_nxv4i1 +; CHECK-DAG: ldr p{{[01]}}, [sp, #6, mul vl] +; CHECK-DAG: ldr p{{[01]}}, [sp, #4, mul vl] + %local0 = alloca + %local1 = alloca + load volatile , * %local0 + load volatile , * %local1 + ret void +} + +define void @fill_nxv2i1() { +; CHECK-LABEL: fill_nxv2i1 +; CHECK-DAG: ldr p{{[01]}}, [sp, #7, mul vl] +; CHECK-DAG: ldr p{{[01]}}, [sp, #6, mul vl] + %local0 = alloca + %local1 = alloca + load volatile , * %local0 + load volatile , * %local1 + ret void +} + +; Predicate spills + +define void @spill_nxv16i1( %v0, %v1) { +; CHECK-LABEL: spill_nxv16i1 +; CHECK-DAG: str p{{[01]}}, [sp, #8, mul vl] +; CHECK-DAG: str p{{[01]}}, [sp] + %local0 = alloca + %local1 = alloca + store volatile %v0, * %local0 + store volatile %v1, * %local1 + ret void +} + +define void @spill_nxv8i1( %v0, %v1) { +; CHECK-LABEL: spill_nxv8i1 +; CHECK-DAG: str p{{[01]}}, [sp, #4, mul vl] +; CHECK-DAG: str p{{[01]}}, [sp] + %local0 = alloca + %local1 = alloca + store volatile %v0, * %local0 + store volatile %v1, * %local1 + ret void +} + +define void @spill_nxv4i1( %v0, %v1) { +; CHECK-LABEL: spill_nxv4i1 +; CHECK-DAG: str p{{[01]}}, [sp, #6, mul vl] +; CHECK-DAG: str p{{[01]}}, [sp, #4, mul vl] + %local0 = alloca + %local1 = alloca + store volatile %v0, * %local0 + store volatile %v1, * %local1 + ret void +} + +define void @spill_nxv2i1( %v0, %v1) { +; CHECK-LABEL: spill_nxv2i1 +; CHECK-DAG: str p{{[01]}}, [sp, #7, mul vl] +; CHECK-DAG: str p{{[01]}}, [sp, #6, mul vl] + %local0 = alloca + %local1 = alloca + store volatile %v0, * %local0 + store volatile %v1, * %local1 + ret void +}