diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -1060,12 +1060,12 @@ Ptr->getType()->getPointerAddressSpace()); // Vector GEP if (Ptr->getType()->isVectorTy()) { - unsigned NumElem = Ptr->getType()->getVectorNumElements(); + ElementCount NumElem = Ptr->getType()->getVectorElementCount(); return VectorType::get(PtrTy, NumElem); } for (Value *Index : IdxList) if (Index->getType()->isVectorTy()) { - unsigned NumElem = Index->getType()->getVectorNumElements(); + ElementCount NumElem = Index->getType()->getVectorElementCount(); return VectorType::get(PtrTy, NumElem); } // Scalar GEP diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5252,7 +5252,8 @@ // amounts. This catches things like trying to shift an i1024 value by an // i8, which is easy to fall into in generic code that uses // TLI.getShiftAmount(). - assert(N2.getValueSizeInBits() >= Log2_32_Ceil(N1.getValueSizeInBits()) && + assert(N2.getScalarValueSizeInBits() >= + Log2_32_Ceil(N1.getScalarValueSizeInBits()) && "Invalid use of small shift amount with oversized value!"); // Always fold shifts of i1 values so the code generator doesn't need to diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3876,13 +3876,17 @@ // Normalize Vector GEP - all scalar operands should be converted to the // splat vector. - unsigned VectorWidth = I.getType()->isVectorTy() ? - I.getType()->getVectorNumElements() : 0; + bool IsVectorGEP = I.getType()->isVectorTy(); + ElementCount VectorElementCount = IsVectorGEP ? + I.getType()->getVectorElementCount() : ElementCount(0, false); - if (VectorWidth && !N.getValueType().isVector()) { + if (IsVectorGEP && !N.getValueType().isVector()) { LLVMContext &Context = *DAG.getContext(); - EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth); - N = DAG.getSplatBuildVector(VT, dl, N); + EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorElementCount); + if (VectorElementCount.Scalable) + N = DAG.getSplatVector(VT, dl, N); + else + N = DAG.getSplatBuildVector(VT, dl, N); } for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I); @@ -3904,9 +3908,16 @@ DAG.getConstant(Offset, dl, N.getValueType()), Flags); } } else { + // IdxSize is the width of the arithmetic according to IR semantics. + // In SelectionDAG, we may prefer to do arithmetic in a wider bitwidth + // (and fix up the result later). unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS); MVT IdxTy = MVT::getIntegerVT(IdxSize); - APInt ElementSize(IdxSize, DL->getTypeAllocSize(GTI.getIndexedType())); + TypeSize ElementSize = DL->getTypeAllocSize(GTI.getIndexedType()); + // We intentionally mask away the high bits here; ElementSize may not + // fit in IdxTy. + APInt ElementMul(IdxSize, ElementSize.getKnownMinSize()); + bool ElementScalable = ElementSize.isScalable(); // If this is a scalar constant or a splat vector of constants, // handle it quickly. @@ -3914,14 +3925,18 @@ if (C && isa(C->getType())) C = C->getSplatValue(); - if (const auto *CI = dyn_cast_or_null(C)) { - if (CI->isZero()) - continue; - APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize); + const auto *CI = dyn_cast_or_null(C); + if (CI && CI->isZero()) + continue; + if (CI && !ElementScalable) { + APInt Offs = ElementMul * CI->getValue().sextOrTrunc(IdxSize); LLVMContext &Context = *DAG.getContext(); - SDValue OffsVal = VectorWidth ? - DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) : - DAG.getConstant(Offs, dl, IdxTy); + SDValue OffsVal; + if (IsVectorGEP) + OffsVal = DAG.getConstant( + Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorElementCount)); + else + OffsVal = DAG.getConstant(Offs, dl, IdxTy); // In an inbounds GEP with an offset that is nonnegative even when // interpreted as signed, assume there is no unsigned overflow. @@ -3935,12 +3950,16 @@ continue; } - // N = N + Idx * ElementSize; + // N = N + Idx * ElementMul; SDValue IdxN = getValue(Idx); - if (!IdxN.getValueType().isVector() && VectorWidth) { - EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorWidth); - IdxN = DAG.getSplatBuildVector(VT, dl, IdxN); + if (!IdxN.getValueType().isVector() && IsVectorGEP) { + EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), + VectorElementCount); + if (VectorElementCount.Scalable) + IdxN = DAG.getSplatVector(VT, dl, IdxN); + else + IdxN = DAG.getSplatBuildVector(VT, dl, IdxN); } // If the index is smaller or larger than intptr_t, truncate or extend @@ -3949,19 +3968,27 @@ // If this is a multiply by a power of two, turn it into a shl // immediately. This is a very common case. - if (ElementSize != 1) { - if (ElementSize.isPowerOf2()) { - unsigned Amt = ElementSize.logBase2(); + if (ElementMul != 1) { + if (ElementMul.isPowerOf2()) { + unsigned Amt = ElementMul.logBase2(); IdxN = DAG.getNode(ISD::SHL, dl, N.getValueType(), IdxN, DAG.getConstant(Amt, dl, IdxN.getValueType())); } else { - SDValue Scale = DAG.getConstant(ElementSize.getZExtValue(), dl, + SDValue Scale = DAG.getConstant(ElementMul.getZExtValue(), dl, IdxN.getValueType()); IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, Scale); } } + if (ElementScalable) { + EVT VScaleTy = N.getValueType().getScalarType(); + SDValue VScale = DAG.getNode(ISD::VSCALE, dl, VScaleTy, + DAG.getConstant(1, dl, VScaleTy)); + if (IsVectorGEP) + VScale = DAG.getSplatVector(N.getValueType(), dl, VScale); + IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale); + } N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, IdxN); diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -985,6 +985,8 @@ defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr", null_frag>; defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr", null_frag>; + def : Pat<(nxv2i64 (shl nxv2i64:$Zdn, nxv2i64:$Zm)), (LSL_ZPmZ_D (PTRUE_D 31), $Zdn, $Zm)>; + defm ASR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b000, "asr", int_aarch64_sve_asr_wide>; defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr", int_aarch64_sve_lsr_wide>; defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl", int_aarch64_sve_lsl_wide>; diff --git a/llvm/test/CodeGen/AArch64/sve-gep.ll b/llvm/test/CodeGen/AArch64/sve-gep.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-gep.ll @@ -0,0 +1,103 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +define <2 x *> @fixed_of_scalable_1(* %base) { +; CHECK-LABEL: fixed_of_scalable_1: +; CHECK: // %bb.0: +; CHECK-NEXT: rdvl x8, #1 +; CHECK-NEXT: lsr x8, x8, #4 +; CHECK-NEXT: dup v1.2d, x8 +; CHECK-NEXT: dup v0.2d, x0 +; CHECK-NEXT: shl v1.2d, v1.2d, #4 +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: ret + %d = getelementptr , * %base, <2 x i64> + ret <2 x *> %d +} + +define <2 x *> @fixed_of_scalable_2(<2 x *> %base) { +; CHECK-LABEL: fixed_of_scalable_2: +; CHECK: // %bb.0: +; CHECK-NEXT: rdvl x8, #1 +; CHECK-NEXT: lsr x8, x8, #4 +; CHECK-NEXT: dup v1.2d, x8 +; CHECK-NEXT: shl v1.2d, v1.2d, #4 +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: ret + %d = getelementptr , <2 x *> %base, <2 x i64> + ret <2 x *> %d +} + +define @scalable_of_fixed_1(i8* %base) { +; CHECK-LABEL: scalable_of_fixed_1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, x0 +; CHECK-NEXT: add z0.d, z0.d, #1 // =0x1 +; CHECK-NEXT: ret + %idx = shufflevector insertelement ( undef, i64 1, i32 0), zeroinitializer, zeroinitializer + %d = getelementptr i8, i8* %base, %idx + ret %d +} + +define @scalable_of_fixed_2( %base) { +; CHECK-LABEL: scalable_of_fixed_2: +; CHECK: // %bb.0: +; CHECK-NEXT: add z0.d, z0.d, #1 // =0x1 +; CHECK-NEXT: ret + %idx = shufflevector insertelement ( undef, i64 1, i32 0), zeroinitializer, zeroinitializer + %d = getelementptr i8, %base, %idx + ret %d +} + +define @scalable_of_fixed_3(i8* %base, %idx) { +; CHECK-LABEL: scalable_of_fixed_3: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sxtw z0.d, p0/m, z0.d +; CHECK-NEXT: mov z1.d, x0 +; CHECK-NEXT: add z0.d, z1.d, z0.d +; CHECK-NEXT: ret + %d = getelementptr i8, i8* %base, %idx + ret %d +} + +define *> @scalable_of_scalable_1(* %base) { +; CHECK-LABEL: scalable_of_scalable_1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w9, #4 +; CHECK-NEXT: rdvl x10, #1 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z0.d, x8 +; CHECK-NEXT: mov z1.d, x9 +; CHECK-NEXT: lsr x8, x10, #4 +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: mov z1.d, x0 +; CHECK-NEXT: add z0.d, z1.d, z0.d +; CHECK-NEXT: ret + %idx = shufflevector insertelement ( undef, i64 1, i32 0), zeroinitializer, zeroinitializer + %d = getelementptr , * %base, %idx + ret *> %d +} + +define *> @scalable_of_scalable_2(*> %base) { +; CHECK-LABEL: scalable_of_scalable_2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w9, #4 +; CHECK-NEXT: rdvl x10, #1 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: mov z2.d, x9 +; CHECK-NEXT: lsr x8, x10, #4 +; CHECK-NEXT: lsl z1.d, p0/m, z1.d, z2.d +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: mul z1.d, p0/m, z1.d, z2.d +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %idx = shufflevector insertelement ( undef, i64 1, i32 0), zeroinitializer, zeroinitializer + %d = getelementptr , *> %base, %idx + ret *> %d +}