diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -1060,12 +1060,12 @@ Ptr->getType()->getPointerAddressSpace()); // Vector GEP if (Ptr->getType()->isVectorTy()) { - unsigned NumElem = Ptr->getType()->getVectorNumElements(); + ElementCount NumElem = Ptr->getType()->getVectorElementCount(); return VectorType::get(PtrTy, NumElem); } for (Value *Index : IdxList) if (Index->getType()->isVectorTy()) { - unsigned NumElem = Index->getType()->getVectorNumElements(); + ElementCount NumElem = Index->getType()->getVectorElementCount(); return VectorType::get(PtrTy, NumElem); } // Scalar GEP diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5252,7 +5252,8 @@ // amounts. This catches things like trying to shift an i1024 value by an // i8, which is easy to fall into in generic code that uses // TLI.getShiftAmount(). - assert(N2.getValueSizeInBits() >= Log2_32_Ceil(N1.getValueSizeInBits()) && + assert(N2.getScalarValueSizeInBits() >= + Log2_32_Ceil(N1.getScalarValueSizeInBits()) && "Invalid use of small shift amount with oversized value!"); // Always fold shifts of i1 values so the code generator doesn't need to diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3876,13 +3876,16 @@ // Normalize Vector GEP - all scalar operands should be converted to the // splat vector. - unsigned VectorWidth = I.getType()->isVectorTy() ? - I.getType()->getVectorNumElements() : 0; + ElementCount VectorElementCount = I.getType()->isVectorTy() ? + I.getType()->getVectorElementCount() : ElementCount(0, false); - if (VectorWidth && !N.getValueType().isVector()) { + if (VectorElementCount.Min && !N.getValueType().isVector()) { LLVMContext &Context = *DAG.getContext(); - EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth); - N = DAG.getSplatBuildVector(VT, dl, N); + EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorElementCount); + if (VectorElementCount.Scalable) + N = DAG.getSplatVector(VT, dl, N); + else + N = DAG.getSplatBuildVector(VT, dl, N); } for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I); @@ -3906,7 +3909,7 @@ } else { unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS); MVT IdxTy = MVT::getIntegerVT(IdxSize); - APInt ElementSize(IdxSize, DL->getTypeAllocSize(GTI.getIndexedType())); + TypeSize ElementSize = DL->getTypeAllocSize(GTI.getIndexedType()); // If this is a scalar constant or a splat vector of constants, // handle it quickly. @@ -3914,14 +3917,18 @@ if (C && isa(C->getType())) C = C->getSplatValue(); - if (const auto *CI = dyn_cast_or_null(C)) { - if (CI->isZero()) - continue; + const auto *CI = dyn_cast_or_null(C); + if (CI && CI->isZero()) + continue; + if (CI && !ElementSize.isScalable()) { APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize); LLVMContext &Context = *DAG.getContext(); - SDValue OffsVal = VectorWidth ? - DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) : - DAG.getConstant(Offs, dl, IdxTy); + SDValue OffsVal; + if (VectorElementCount.Min) + OffsVal = DAG.getConstant( + Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorElementCount)); + else + OffsVal = DAG.getConstant(Offs, dl, IdxTy); // In an inbounds GEP with an offset that is nonnegative even when // interpreted as signed, assume there is no unsigned overflow. @@ -3938,9 +3945,13 @@ // N = N + Idx * ElementSize; SDValue IdxN = getValue(Idx); - if (!IdxN.getValueType().isVector() && VectorWidth) { - EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorWidth); - IdxN = DAG.getSplatBuildVector(VT, dl, IdxN); + if (!IdxN.getValueType().isVector() && VectorElementCount.Min) { + EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), + VectorElementCount); + if (VectorElementCount.Scalable) + IdxN = DAG.getSplatVector(VT, dl, IdxN); + else + IdxN = DAG.getSplatBuildVector(VT, dl, IdxN); } // If the index is smaller or larger than intptr_t, truncate or extend @@ -3949,19 +3960,26 @@ // If this is a multiply by a power of two, turn it into a shl // immediately. This is a very common case. - if (ElementSize != 1) { - if (ElementSize.isPowerOf2()) { - unsigned Amt = ElementSize.logBase2(); + if (ElementSize.getKnownMinSize() != 1) { + if (isPowerOf2_64(ElementSize.getKnownMinSize())) { + unsigned Amt = Log2_64(ElementSize.getKnownMinSize()); IdxN = DAG.getNode(ISD::SHL, dl, N.getValueType(), IdxN, DAG.getConstant(Amt, dl, IdxN.getValueType())); } else { - SDValue Scale = DAG.getConstant(ElementSize.getZExtValue(), dl, + SDValue Scale = DAG.getConstant(ElementSize.getKnownMinSize(), dl, IdxN.getValueType()); IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, Scale); } } + if (ElementSize.isScalable()) { + SDValue VScale = DAG.getNode(ISD::VSCALE, dl, IdxTy, + DAG.getConstant(1, dl, IdxTy)); + if (VectorElementCount.Min) + VScale = DAG.getSplatVector(N.getValueType(), dl, VScale); + IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale); + } N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, IdxN); diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -985,6 +985,8 @@ defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr", null_frag>; defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr", null_frag>; + def : Pat<(nxv2i64 (shl nxv2i64:$Zdn, nxv2i64:$Zm)), (LSL_ZPmZ_D (PTRUE_D 31), $Zdn, $Zm)>; + defm ASR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b000, "asr", int_aarch64_sve_asr_wide>; defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr", int_aarch64_sve_lsr_wide>; defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl", int_aarch64_sve_lsl_wide>; diff --git a/llvm/test/CodeGen/AArch64/sve-gep.ll b/llvm/test/CodeGen/AArch64/sve-gep.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-gep.ll @@ -0,0 +1,103 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +define <2 x *> @fixed_of_scalable_1(* %base) { +; CHECK-LABEL: fixed_of_scalable_1: +; CHECK: // %bb.0: +; CHECK-NEXT: rdvl x8, #1 +; CHECK-NEXT: lsr x8, x8, #4 +; CHECK-NEXT: dup v1.2d, x8 +; CHECK-NEXT: dup v0.2d, x0 +; CHECK-NEXT: shl v1.2d, v1.2d, #4 +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: ret + %d = getelementptr , * %base, <2 x i64> + ret <2 x *> %d +} + +define <2 x *> @fixed_of_scalable_2(<2 x *> %base) { +; CHECK-LABEL: fixed_of_scalable_2: +; CHECK: // %bb.0: +; CHECK-NEXT: rdvl x8, #1 +; CHECK-NEXT: lsr x8, x8, #4 +; CHECK-NEXT: dup v1.2d, x8 +; CHECK-NEXT: shl v1.2d, v1.2d, #4 +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: ret + %d = getelementptr , <2 x *> %base, <2 x i64> + ret <2 x *> %d +} + +define @scalable_of_fixed_1(i8* %base) { +; CHECK-LABEL: scalable_of_fixed_1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, x0 +; CHECK-NEXT: add z0.d, z0.d, #1 // =0x1 +; CHECK-NEXT: ret + %idx = shufflevector insertelement ( undef, i64 1, i32 0), zeroinitializer, zeroinitializer + %d = getelementptr i8, i8* %base, %idx + ret %d +} + +define @scalable_of_fixed_2( %base) { +; CHECK-LABEL: scalable_of_fixed_2: +; CHECK: // %bb.0: +; CHECK-NEXT: add z0.d, z0.d, #1 // =0x1 +; CHECK-NEXT: ret + %idx = shufflevector insertelement ( undef, i64 1, i32 0), zeroinitializer, zeroinitializer + %d = getelementptr i8, %base, %idx + ret %d +} + +define @scalable_of_fixed_3(i8* %base, %idx) { +; CHECK-LABEL: scalable_of_fixed_3: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sxtw z0.d, p0/m, z0.d +; CHECK-NEXT: mov z1.d, x0 +; CHECK-NEXT: add z0.d, z1.d, z0.d +; CHECK-NEXT: ret + %d = getelementptr i8, i8* %base, %idx + ret %d +} + +define *> @scalable_of_scalable_1(* %base) { +; CHECK-LABEL: scalable_of_scalable_1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w9, #4 +; CHECK-NEXT: rdvl x10, #1 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z0.d, x8 +; CHECK-NEXT: mov z1.d, x9 +; CHECK-NEXT: lsr x8, x10, #4 +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: mov z1.d, x0 +; CHECK-NEXT: add z0.d, z1.d, z0.d +; CHECK-NEXT: ret + %idx = shufflevector insertelement ( undef, i64 1, i32 0), zeroinitializer, zeroinitializer + %d = getelementptr , * %base, %idx + ret *> %d +} + +define *> @scalable_of_scalable_2(*> %base) { +; CHECK-LABEL: scalable_of_scalable_2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w9, #4 +; CHECK-NEXT: rdvl x10, #1 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: mov z2.d, x9 +; CHECK-NEXT: lsr x8, x10, #4 +; CHECK-NEXT: lsl z1.d, p0/m, z1.d, z2.d +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: mul z1.d, p0/m, z1.d, z2.d +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %idx = shufflevector insertelement ( undef, i64 1, i32 0), zeroinitializer, zeroinitializer + %d = getelementptr , *> %base, %idx + ret *> %d +}