diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1854,12 +1854,16 @@ SmallVector Ops(GEP.op_begin(), GEP.op_end()); Type *GEPType = GEP.getType(); Type *GEPEltType = GEP.getSourceElementType(); + bool IsGEPSrcEleScalable = + GEPEltType->isVectorTy() && cast(GEPEltType)->isScalable(); if (Value *V = SimplifyGEPInst(GEPEltType, Ops, SQ.getWithInstruction(&GEP))) return replaceInstUsesWith(GEP, V); // For vector geps, use the generic demanded vector support. - if (auto *GEPVTy = dyn_cast(GEP.getType())) { - auto VWidth = GEPVTy->getNumElements(); + // Skip if GEP return type is scalable. The number of elements is unknown at + // compile-time. + if (GEPType->isVectorTy() && !cast(GEPType)->isScalable()) { + auto VWidth = cast(GEPType)->getNumElements(); APInt UndefElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask, @@ -1896,7 +1900,7 @@ Type *NewIndexType = IndexTy->isVectorTy() ? VectorType::get(NewScalarIndexTy, - cast(IndexTy)->getNumElements()) + cast(IndexTy)->getElementCount()) : NewScalarIndexTy; // If the element type has zero size then any index over it is equivalent @@ -2146,11 +2150,13 @@ GEP.getName()); } - if (GEP.getNumIndices() == 1) { + // Skip if GEP source element type is scalable. The type alloc size is unknown + // at compile-time. + if (GEP.getNumIndices() == 1 && !IsGEPSrcEleScalable) { unsigned AS = GEP.getPointerAddressSpace(); if (GEP.getOperand(1)->getType()->getScalarSizeInBits() == DL.getIndexSizeInBits(AS)) { - uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType); + uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType).getFixedSize(); bool Matched = false; uint64_t C; @@ -2263,10 +2269,12 @@ } } } - } else if (GEP.getNumOperands() == 2) { - // Transform things like: - // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V - // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast + } else if (GEP.getNumOperands() == 2 && !IsGEPSrcEleScalable) { + // Skip if GEP source element type is scalable. The type alloc size is + // unknown at compile-time. + // Transform things like: %t = getelementptr i32* + // bitcast ([2 x i32]* %str to i32*), i32 %V into: %t1 = getelementptr [2 + // x i32]* %str, i32 0, i32 %V; bitcast if (StrippedPtrEltTy->isArrayTy() && DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType()) == DL.getTypeAllocSize(GEPEltType)) { @@ -2290,8 +2298,8 @@ if (GEPEltType->isSized() && StrippedPtrEltTy->isSized()) { // Check that changing the type amounts to dividing the index by a scale // factor. - uint64_t ResSize = DL.getTypeAllocSize(GEPEltType); - uint64_t SrcSize = DL.getTypeAllocSize(StrippedPtrEltTy); + uint64_t ResSize = DL.getTypeAllocSize(GEPEltType).getFixedSize(); + uint64_t SrcSize = DL.getTypeAllocSize(StrippedPtrEltTy).getFixedSize(); if (ResSize && SrcSize % ResSize == 0) { Value *Idx = GEP.getOperand(1); unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits(); @@ -2330,9 +2338,10 @@ StrippedPtrEltTy->isArrayTy()) { // Check that changing to the array element type amounts to dividing the // index by a scale factor. - uint64_t ResSize = DL.getTypeAllocSize(GEPEltType); + uint64_t ResSize = DL.getTypeAllocSize(GEPEltType).getFixedSize(); uint64_t ArrayEltSize = - DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType()); + DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType()) + .getFixedSize(); if (ResSize && ArrayEltSize % ResSize == 0) { Value *Idx = GEP.getOperand(1); unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits(); @@ -2480,7 +2489,9 @@ if (auto *AI = dyn_cast(UnderlyingPtrOp)) { if (GEP.accumulateConstantOffset(DL, BasePtrOffset) && BasePtrOffset.isNonNegative()) { - APInt AllocSize(IdxWidth, DL.getTypeAllocSize(AI->getAllocatedType())); + APInt AllocSize( + IdxWidth, + DL.getTypeAllocSize(AI->getAllocatedType()).getKnownMinSize()); if (BasePtrOffset.ule(AllocSize)) { return GetElementPtrInst::CreateInBounds( GEP.getSourceElementType(), PtrOp, makeArrayRef(Ops).slice(1), diff --git a/llvm/test/Transforms/InstCombine/vscale_gep.ll b/llvm/test/Transforms/InstCombine/vscale_gep.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/vscale_gep.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine < %s | FileCheck %s + +; This test is used to verify we are not crashing at Assertion `CastInst::castIsValid(opc, C, Ty) && "Invalid constantexpr cast!". +define @gep_index_type_is_scalable(i8* %p) { +; CHECK-LABEL: @gep_index_type_is_scalable( +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, i8* [[P:%.*]], undef +; CHECK-NEXT: ret [[GEP]] +; + %gep = getelementptr i8, i8* %p, undef + ret %gep +} + +; This test serves to verify code changes for "GEP.getNumIndices() == 1". +define * @gep_num_of_indices_1(* %p) { +; CHECK-LABEL: @gep_num_of_indices_1( +; CHECK-NEXT: [[GEP:%.*]] = getelementptr , * [[P:%.*]], i64 1 +; CHECK-NEXT: ret * [[GEP]] +; + %gep = getelementptr , * %p, i64 1 + ret * %gep +} + +; This test serves to verify code changes for "GEP.getNumOperands() == 2". +define void @gep_bitcast(i8* %p) { +; CHECK-LABEL: @gep_bitcast( +; CHECK-NEXT: [[CAST:%.*]] = bitcast i8* [[P:%.*]] to * +; CHECK-NEXT: store zeroinitializer, * [[CAST]], align 16 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr , * [[CAST]], i64 1 +; CHECK-NEXT: store zeroinitializer, * [[GEP2]], align 16 +; CHECK-NEXT: ret void +; + %cast = bitcast i8* %p to * + %gep1 = getelementptr , * %cast, i64 0 + store zeroinitializer, * %gep1 + %gep2 = getelementptr , * %cast, i64 1 + store zeroinitializer, * %gep2 + ret void +} + +; These tests serve to verify code changes when underlying gep ptr is alloca. +; This test is to verify 'inbounds' is added when it's valid to accumulate constant offset. +define i32 @gep_alloca_inbounds_vscale_zero() { +; CHECK-LABEL: @gep_alloca_inbounds_vscale_zero( +; CHECK-NEXT: [[A:%.*]] = alloca , align 16 +; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds , * [[A]], i64 0, i64 2 +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[TMP]], align 8 +; CHECK-NEXT: ret i32 [[LOAD]] +; + %a = alloca + %tmp = getelementptr , * %a, i32 0, i32 2 + %load = load i32, i32* %tmp + ret i32 %load +} + +; This test is to verify 'inbounds' is not added when a constant offset can not be determined at compile-time. +define i32 @gep_alloca_inbounds_vscale_nonzero() { +; CHECK-LABEL: @gep_alloca_inbounds_vscale_nonzero( +; CHECK-NEXT: [[A:%.*]] = alloca , align 16 +; CHECK-NEXT: [[TMP:%.*]] = getelementptr , * [[A]], i64 1, i64 2 +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[TMP]], align 8 +; CHECK-NEXT: ret i32 [[LOAD]] +; + %a = alloca + %tmp = getelementptr , * %a, i32 1, i32 2 + %load = load i32, i32* %tmp + ret i32 %load +}