diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3876,13 +3876,17 @@ // Normalize Vector GEP - all scalar operands should be converted to the // splat vector. - unsigned VectorWidth = I.getType()->isVectorTy() ? - I.getType()->getVectorNumElements() : 0; + bool IsVectorGEP = I.getType()->isVectorTy(); + ElementCount VectorElementCount = IsVectorGEP ? + I.getType()->getVectorElementCount() : ElementCount(0, false); - if (VectorWidth && !N.getValueType().isVector()) { + if (IsVectorGEP && !N.getValueType().isVector()) { LLVMContext &Context = *DAG.getContext(); - EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth); - N = DAG.getSplatBuildVector(VT, dl, N); + EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorElementCount); + if (VectorElementCount.Scalable) + N = DAG.getSplatVector(VT, dl, N); + else + N = DAG.getSplatBuildVector(VT, dl, N); } for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I); @@ -3904,9 +3908,16 @@ DAG.getConstant(Offset, dl, N.getValueType()), Flags); } } else { + // IdxSize is the width of the arithmetic according to IR semantics. + // In SelectionDAG, we may prefer to do arithmetic in a wider bitwidth + // (and fix up the result later). unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS); MVT IdxTy = MVT::getIntegerVT(IdxSize); - APInt ElementSize(IdxSize, DL->getTypeAllocSize(GTI.getIndexedType())); + TypeSize ElementSize = DL->getTypeAllocSize(GTI.getIndexedType()); + // We intentionally mask away the high bits here; ElementSize may not + // fit in IdxTy. + APInt ElementMul(IdxSize, ElementSize.getKnownMinSize()); + bool ElementScalable = ElementSize.isScalable(); // If this is a scalar constant or a splat vector of constants, // handle it quickly. @@ -3914,14 +3925,18 @@ if (C && isa(C->getType())) C = C->getSplatValue(); - if (const auto *CI = dyn_cast_or_null(C)) { - if (CI->isZero()) - continue; - APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize); + const auto *CI = dyn_cast_or_null(C); + if (CI && CI->isZero()) + continue; + if (CI && !ElementScalable) { + APInt Offs = ElementMul * CI->getValue().sextOrTrunc(IdxSize); LLVMContext &Context = *DAG.getContext(); - SDValue OffsVal = VectorWidth ? - DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) : - DAG.getConstant(Offs, dl, IdxTy); + SDValue OffsVal; + if (IsVectorGEP) + OffsVal = DAG.getConstant( + Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorElementCount)); + else + OffsVal = DAG.getConstant(Offs, dl, IdxTy); // In an inbounds GEP with an offset that is nonnegative even when // interpreted as signed, assume there is no unsigned overflow. @@ -3935,31 +3950,45 @@ continue; } - // N = N + Idx * ElementSize; + // N = N + Idx * ElementMul; SDValue IdxN = getValue(Idx); - if (!IdxN.getValueType().isVector() && VectorWidth) { - EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorWidth); - IdxN = DAG.getSplatBuildVector(VT, dl, IdxN); + if (!IdxN.getValueType().isVector() && IsVectorGEP) { + EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), + VectorElementCount); + if (VectorElementCount.Scalable) + IdxN = DAG.getSplatVector(VT, dl, IdxN); + else + IdxN = DAG.getSplatBuildVector(VT, dl, IdxN); } // If the index is smaller or larger than intptr_t, truncate or extend // it. IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType()); - // If this is a multiply by a power of two, turn it into a shl - // immediately. This is a very common case. - if (ElementSize != 1) { - if (ElementSize.isPowerOf2()) { - unsigned Amt = ElementSize.logBase2(); - IdxN = DAG.getNode(ISD::SHL, dl, - N.getValueType(), IdxN, - DAG.getConstant(Amt, dl, IdxN.getValueType())); - } else { - SDValue Scale = DAG.getConstant(ElementSize.getZExtValue(), dl, - IdxN.getValueType()); - IdxN = DAG.getNode(ISD::MUL, dl, - N.getValueType(), IdxN, Scale); + if (ElementScalable) { + EVT VScaleTy = N.getValueType().getScalarType(); + SDValue VScale = DAG.getNode( + ISD::VSCALE, dl, VScaleTy, + DAG.getConstant(ElementMul.getZExtValue(), dl, VScaleTy)); + if (IsVectorGEP) + VScale = DAG.getSplatVector(N.getValueType(), dl, VScale); + IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale); + } else { + // If this is a multiply by a power of two, turn it into a shl + // immediately. This is a very common case. + if (ElementMul != 1) { + if (ElementMul.isPowerOf2()) { + unsigned Amt = ElementMul.logBase2(); + IdxN = DAG.getNode(ISD::SHL, dl, + N.getValueType(), IdxN, + DAG.getConstant(Amt, dl, IdxN.getValueType())); + } else { + SDValue Scale = DAG.getConstant(ElementMul.getZExtValue(), dl, + IdxN.getValueType()); + IdxN = DAG.getNode(ISD::MUL, dl, + N.getValueType(), IdxN, Scale); + } } } diff --git a/llvm/test/CodeGen/AArch64/sve-gep.ll b/llvm/test/CodeGen/AArch64/sve-gep.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-gep.ll @@ -0,0 +1,140 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +define * @scalar_of_scalable_1(* %base) { +; CHECK-LABEL: scalar_of_scalable_1: +; CHECK: // %bb.0: +; CHECK-NEXT: rdvl x8, #1 +; CHECK-NEXT: add x0, x0, x8, lsl #2 +; CHECK-NEXT: ret + %d = getelementptr , * %base, i64 4 + ret * %d +} + +define * @scalar_of_scalable_2(* %base, i64 %offset) { +; CHECK-LABEL: scalar_of_scalable_2: +; CHECK: // %bb.0: +; CHECK-NEXT: rdvl x8, #1 +; CHECK-NEXT: madd x0, x1, x8, x0 +; CHECK-NEXT: ret + %d = getelementptr , * %base, i64 %offset + ret * %d +} + +define * @scalar_of_scalable_3(* %base, i64 %offset) { +; CHECK-LABEL: scalar_of_scalable_3: +; CHECK: // %bb.0: +; CHECK-NEXT: cnth x8 +; CHECK-NEXT: madd x0, x1, x8, x0 +; CHECK-NEXT: ret + %d = getelementptr , * %base, i64 %offset + ret * %d +} + +define <2 x *> @fixed_of_scalable_1(* %base) { +; CHECK-LABEL: fixed_of_scalable_1: +; CHECK: // %bb.0: +; CHECK-NEXT: rdvl x8, #1 +; CHECK-NEXT: dup v0.2d, x8 +; CHECK-NEXT: dup v1.2d, x0 +; CHECK-NEXT: add v0.2d, v1.2d, v0.2d +; CHECK-NEXT: ret + %d = getelementptr , * %base, <2 x i64> + ret <2 x *> %d +} + +define <2 x *> @fixed_of_scalable_2(<2 x *> %base) { +; CHECK-LABEL: fixed_of_scalable_2: +; CHECK: // %bb.0: +; CHECK-NEXT: rdvl x8, #1 +; CHECK-NEXT: dup v1.2d, x8 +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: ret + %d = getelementptr , <2 x *> %base, <2 x i64> + ret <2 x *> %d +} + +define @scalable_of_fixed_1(i8* %base) { +; CHECK-LABEL: scalable_of_fixed_1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, x0 +; CHECK-NEXT: add z0.d, z0.d, #1 // =0x1 +; CHECK-NEXT: ret + %idx = shufflevector insertelement ( undef, i64 1, i32 0), zeroinitializer, zeroinitializer + %d = getelementptr i8, i8* %base, %idx + ret %d +} + +define @scalable_of_fixed_2( %base) { +; CHECK-LABEL: scalable_of_fixed_2: +; CHECK: // %bb.0: +; CHECK-NEXT: add z0.d, z0.d, #1 // =0x1 +; CHECK-NEXT: ret + %idx = shufflevector insertelement ( undef, i64 1, i32 0), zeroinitializer, zeroinitializer + %d = getelementptr i8, %base, %idx + ret %d +} + +define @scalable_of_fixed_3(i8* %base) { +; CHECK-LABEL: scalable_of_fixed_3: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, x0 +; CHECK-NEXT: add z0.d, z0.d, #1 // =0x1 +; CHECK-NEXT: ret + %idx = shufflevector insertelement ( undef, i64 1, i32 0), zeroinitializer, zeroinitializer + %d = getelementptr i8, i8* %base, %idx + ret %d +} + +define @scalable_of_fixed_4(i8* %base, %idx) { +; CHECK-LABEL: scalable_of_fixed_4: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sxtw z0.d, p0/m, z0.d +; CHECK-NEXT: mov z1.d, x0 +; CHECK-NEXT: add z0.d, z1.d, z0.d +; CHECK-NEXT: ret + %d = getelementptr i8, i8* %base, %idx + ret %d +} + +define *> @scalable_of_scalable_1(* %base) { +; CHECK-LABEL: scalable_of_scalable_1: +; CHECK: // %bb.0: +; CHECK-NEXT: rdvl x8, #1 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: mov z0.d, x0 +; CHECK-NEXT: mul z1.d, z1.d, #1 +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %idx = shufflevector insertelement ( undef, i64 1, i32 0), zeroinitializer, zeroinitializer + %d = getelementptr , * %base, %idx + ret *> %d +} + +define *> @scalable_of_scalable_2(*> %base) { +; CHECK-LABEL: scalable_of_scalable_2: +; CHECK: // %bb.0: +; CHECK-NEXT: rdvl x8, #1 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: mul z1.d, z1.d, #1 +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %idx = shufflevector insertelement ( undef, i64 1, i32 0), zeroinitializer, zeroinitializer + %d = getelementptr , *> %base, %idx + ret *> %d +} + +define *> @scalable_of_scalable_3(*> %base, %idx) { +; CHECK-LABEL: scalable_of_scalable_3: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: rdvl x8, #1 +; CHECK-NEXT: sxtw z1.d, p0/m, z1.d +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: mul z1.d, p0/m, z1.d, z2.d +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %d = getelementptr , *> %base, %idx + ret *> %d +}