diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -931,7 +931,8 @@ SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm) { assert(MulImm.getMinSignedBits() <= VT.getSizeInBits() && "Immediate does not fit VT"); - return getNode(ISD::VSCALE, DL, VT, getConstant(MulImm, DL, VT)); + return getNode(ISD::VSCALE, DL, VT, + getConstant(MulImm.sextOrTrunc(VT.getSizeInBits()), DL, VT)); } /// Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc. diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2371,6 +2371,16 @@ return DAG.getVScale(DL, VT, C0 + C1); } + // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2) + if ((N0.getOpcode() == ISD::ADD) && + (N0.getOperand(1).getOpcode() == ISD::VSCALE) && + (N1.getOpcode() == ISD::VSCALE)) { + auto VS0 = N0.getOperand(1)->getConstantOperandAPInt(0); + auto VS1 = N1->getConstantOperandAPInt(0); + auto VS = DAG.getVScale(DL, VT, VS0 + VS1); + return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS); + } + return SDValue(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1537,11 +1537,22 @@ LD->getPointerInfo(), LoMemVT, LD->getOriginalAlign(), MMOFlags, AAInfo); - unsigned IncrementSize = LoMemVT.getSizeInBits()/8; - Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); - Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, - LD->getPointerInfo().getWithOffset(IncrementSize), HiMemVT, - LD->getOriginalAlign(), MMOFlags, AAInfo); + unsigned IncrementSize = LoMemVT.getSizeInBits().getKnownMinSize() / 8; + + MachinePointerInfo MPI; + if (LoVT.isScalableVector()) { + SDValue BytesIncrement = DAG.getVScale( + dl, Ptr.getValueType(), + APInt(Ptr.getValueSizeInBits().getFixedSize(), IncrementSize)); + MPI = MachinePointerInfo(LD->getPointerInfo().getAddrSpace()); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, BytesIncrement); + } else { + MPI = LD->getPointerInfo().getWithOffset(IncrementSize); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); + } + + Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, MPI, + HiMemVT, LD->getOriginalAlign(), MMOFlags, AAInfo); // Build a factor node to remember that this load is independent of the // other one. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4802,6 +4802,9 @@ if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X) return getNode(ISD::FABS, DL, VT, Operand.getOperand(0)); break; + case ISD::VSCALE: + assert(VT == Operand.getValueType() && "Unexpected VT!"); + break; } SDNode *N; diff --git a/llvm/test/CodeGen/AArch64/sve-split-load.ll b/llvm/test/CodeGen/AArch64/sve-split-load.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-split-load.ll @@ -0,0 +1,55 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; LOAD + +define @load_promote_4i8(* %a) { +; CHECK-LABEL: load_promote_4i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = load , * %a + ret %load +} + +define @load_split_i16(* %a) { +; CHECK-LABEL: load_split_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ret + %load = load , * %a + ret %load +} + +define @load_split_32i16(* %a) { +; CHECK-LABEL: load_split_32i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ld1h { z2.h }, p0/z, [x0, #2, mul vl] +; CHECK-NEXT: ld1h { z3.h }, p0/z, [x0, #3, mul vl] +; CHECK-NEXT: ret + %load = load , * %a + ret %load +} + +define @load_split_16i64(* %a) { +; CHECK-LABEL: load_split_16i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0, #2, mul vl] +; CHECK-NEXT: ld1d { z3.d }, p0/z, [x0, #3, mul vl] +; CHECK-NEXT: ld1d { z4.d }, p0/z, [x0, #4, mul vl] +; CHECK-NEXT: ld1d { z5.d }, p0/z, [x0, #5, mul vl] +; CHECK-NEXT: ld1d { z6.d }, p0/z, [x0, #6, mul vl] +; CHECK-NEXT: ld1d { z7.d }, p0/z, [x0, #7, mul vl] +; CHECK-NEXT: ret + %load = load , * %a + ret %load +}