diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -754,6 +754,13 @@ /// /// If the multiplication is known not to overflow then NoSignedWrap is set. Value *Descale(Value *Val, APInt Scale, bool &NoSignedWrap); + + /// Simplify GEP chain if some of the GEP instructions have constant indices. + /// + /// For example: + /// GEP i8 (GEP (GEP i64 X, C1), I1), C2 = GEP i8 (GEP X, I1) C3 where + /// C3 = C1 * 8 + C2 for constants C1 and C2. + Value *SimplifyGEPChain(GetElementPtrInst &GEP); }; class Negator final { diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2251,6 +2251,231 @@ return nullptr; } +// Clone a GEP instruction with a new pointer operand with updated index, given +// a constant value to be added or appended to the last index of the original +// GEP. +static Value *CreateInBoundsGEPUpdateIndex(InstCombiner::BuilderTy &Builder, + GetElementPtrInst *GEP, + Value *NewPointer, APInt Diff, + bool AppendIndex) { + SmallVector NewIndices(GEP->indices()); + if (AppendIndex) { + NewIndices.push_back(ConstantInt::get( + Type::getIntNTy(Builder.getContext(), Diff.getBitWidth()), Diff)); + } else { + APInt Idx = + dyn_cast((GEP->indices().end() - 1)->get())->getValue(); + // Make sure NewIdx is big engouh to hold the sum of Idx and Diff. + unsigned BitWidth = std::max(Idx.getBitWidth(), Diff.getBitWidth()); + Idx = Idx.sextOrSelf(BitWidth); + Diff = Diff.sextOrSelf(BitWidth); + bool Overflow = false; + APInt Res = Idx.sadd_ov(Diff, Overflow); + if (Overflow) { + BitWidth++; + Idx = Idx.sext(BitWidth); + Diff = Diff.sext(BitWidth); + Res = Idx + Diff; + } + Constant *NewIdx = + ConstantInt::get(Type::getIntNTy(Builder.getContext(), BitWidth), Res); + NewIndices.back() = NewIdx; + } + return Builder.CreateInBoundsGEP(GEP->getSourceElementType(), NewPointer, + NewIndices); +} + +// Recursively rewrite the GEP chain with the replaced inner GEP, and return +// the new value replacing outer GEP. +static Value *RewriteGEPChain(InstCombiner::BuilderTy &Builder, Value *V, + Value *Match, Value *NewV) { + if (V == Match) + return NewV; + + if (BitCastInst *BC = dyn_cast(V)) { + return Builder.CreateBitCast( + RewriteGEPChain(Builder, BC->getOperand(0), Match, NewV), + BC->getDestTy()); + } else if (GetElementPtrInst *GEP = dyn_cast(V)) { + return Builder.CreateInBoundsGEP( + GEP->getSourceElementType(), + RewriteGEPChain(Builder, GEP->getPointerOperand(), Match, NewV), + SmallVector(GEP->indices())); + } else { + llvm_unreachable("Unsupported type"); + } +} + +/// If the outer (most dominated) and the inner (dominates most) GEP +/// instructions in a chain of GEP (and bitcast) have constant indices, and +/// intermediate values have no other uses, and their indices are not dominated +/// by inner GEP, these two instructions can be merged with recalculated +/// constant indices if the constant offset of one GEP is divisible by the +/// scalar element size of the other. +Value *InstCombinerImpl::SimplifyGEPChain(GetElementPtrInst &OuterGEP) { + auto IsGEPNonZeroConstantIndex = [&](GetElementPtrInst *GEP) { + // Check each index is a constant int, also check non all indices are zero, + // because that is same as a bitcast with no cost, we should not modify it + // to introduce pointer arithmetic. + bool NonZero = false; + for (Value *Idx : GEP->indices()) { + ConstantInt *CI; + if (!match(Idx, m_ConstantInt(CI))) + return false; + if (!CI->isZero()) + NonZero = true; + } + + // Check if result has a fixed size type. + return NonZero && !isa(GEP->getResultElementType()) && + DL.getTypeAllocSize(GEP->getResultElementType()) != 0; + }; + + // Check if the second to the last GEP index points to an array, so that we + // can perform pointer arithmetic by changing its last index. + auto IsPointerToArray = [](GetElementPtrInst *GEP) { + // If a GEP has only 1 index, it behaves like pointing to an array. + if (GEP->getNumIndices() == 1) + return true; + return isa(GetElementPtrInst::getIndexedType( + GEP->getSourceElementType(), + SmallVector(llvm::drop_end(GEP->indices())))); + }; + + // If OuterGEP does not have constant indices, just bail out. + if (!OuterGEP.isInBounds() || OuterGEP.getNumIndices() == 0 || + !IsGEPNonZeroConstantIndex(&OuterGEP)) + return nullptr; + + Value *V = OuterGEP.getOperand(0); + Value *NewV = nullptr; + bool AppendIndex = false; + APInt Diff; + + unsigned Depth = 0; + while (Depth++ < MaxAnalysisRecursionDepth) { + // Except for the inner GEP, all intermediate instructions should + // have exactly one use, so that rewriting does not increase instruction + // count. This also implies intermediate GEP's indices are not dependent on + // any other intermediate GEP, otherwise the transfromation becomes invalid. + bool hasOneUse = V->hasOneUse(); + + if (BitCastInst *BC = dyn_cast(V)) { + // Bitcast doesn't change the value of an address, keep searching into it. + if (!hasOneUse) + return nullptr; + V = BC->getOperand(0); + } else if (GetElementPtrInst *GEP = dyn_cast(V)) { + // Can't guarantee pointer arithmetic is valid in these cases. + if (!GEP->isInBounds() || + OuterGEP.getAddressSpace() != GEP->getAddressSpace()) + return nullptr; + + const size_t BitWidth = DL.getIndexSizeInBits(OuterGEP.getAddressSpace()); + APInt OuterOffset(BitWidth, 0, true); + APInt InnerOffset(BitWidth, 0, true); + + // Found inner GEP candidate with constant indices. + if (GEP->getNumIndices() > 0 && IsGEPNonZeroConstantIndex(GEP) && + OuterGEP.accumulateConstantOffset(DL, OuterOffset) && + GEP->accumulateConstantOffset(DL, InnerOffset)) { + + Builder.SetInsertPoint(&OuterGEP); + + // If two offsets cancel each other, we can eliminate both GEP + // instructions. + if (InnerOffset + OuterOffset == 0) { + NewV = GEP->getPointerOperand(); + if (GEP->getPointerOperandType() != GEP->getType()) + NewV = Builder.CreateBitCast(NewV, GEP->getType()); + break; + } + + APInt InnerSize(BitWidth, + DL.getTypeAllocSize(GEP->getResultElementType())); + + // If outer GEP offset is divisible by inner GEP element size, and the + // last index of inner GEP is not a struct index, merge it to inner + // GEP by adding the quotient to its last index. + if (OuterOffset.srem(InnerSize) == 0 && IsPointerToArray(GEP)) { + NewV = CreateInBoundsGEPUpdateIndex( + Builder, GEP, GEP->getPointerOperand(), + OuterOffset.sdiv(InnerSize), false); + break; + } + + // If inner GEP's result points to an array, and outer GEP offset is + // divisible by the array's element type size, merge it to inner GEP + // by appending an index. Note that while we can index through every + // nested array or struct in attempt to find a size by which the offset + // is divisible, this will be a costly process, so we only search down + // one level. + if (GEP->getResultElementType()->isArrayTy()) { + InnerSize = DL.getTypeAllocSize( + GEP->getResultElementType()->getArrayElementType()); + if (OuterOffset.srem(InnerSize) == 0) { + NewV = CreateInBoundsGEPUpdateIndex( + Builder, GEP, GEP->getPointerOperand(), + OuterOffset.sdiv(InnerSize), true); + break; + } + } + + // Check the other way around, if inner GEP offset is divisible by outer + // GEP element size. If so eliminate inner GEP and let outer GEP merge + // it. + APInt OuterSize(BitWidth, + DL.getTypeAllocSize(OuterGEP.getResultElementType())); + if (InnerOffset.srem(OuterSize) == 0 && IsPointerToArray(&OuterGEP)) { + Diff = InnerOffset.sdiv(OuterSize); + NewV = GEP->getPointerOperand(); + if (GEP->getPointerOperandType() != GEP->getType()) + NewV = Builder.CreateBitCast(NewV, GEP->getType()); + break; + } + + // Similar case if outer GEP's result points to an array. + if (OuterGEP.getResultElementType()->isArrayTy()) { + OuterSize = DL.getTypeAllocSize( + OuterGEP.getResultElementType()->getArrayElementType()); + if (InnerOffset.srem(OuterSize) == 0) { + AppendIndex = true; + Diff = InnerOffset.sdiv(OuterSize); + NewV = GEP->getPointerOperand(); + if (GEP->getPointerOperandType() != GEP->getType()) + NewV = Builder.CreateBitCast(NewV, GEP->getType()); + break; + } + } + + // Otherwise this GEP is not mergeable, treat it as a regular GEP. + } + + // Regular GEP, keep searching into it. + if (!hasOneUse) + return nullptr; + V = GEP->getPointerOperand(); + } else { + return nullptr; + } + } + + if (NewV) { + NewV = RewriteGEPChain(Builder, OuterGEP.getPointerOperand(), V, NewV); + + // Handle merge inner GEP to outer. + if (Diff != 0) + return CreateInBoundsGEPUpdateIndex(Builder, &OuterGEP, NewV, Diff, + AppendIndex); + // Handle outer GEP being eliminated (merged to inner, or cancelled out). + if (OuterGEP.getType() != NewV->getType()) + return Builder.CreateBitCast(NewV, OuterGEP.getType()); + return NewV; + } + + return nullptr; +} + Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) { Value *PtrOp = GEP.getOperand(0); SmallVector Indices(GEP.indices()); @@ -2693,6 +2918,9 @@ } } + if (Value *V = SimplifyGEPChain(GEP)) + return replaceInstUsesWith(GEP, V); + if (Instruction *R = foldSelectGEP(GEP, Builder)) return R; diff --git a/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll b/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll --- a/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll +++ b/llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll @@ -11,10 +11,9 @@ ; result = (p + 3) + a define i32* @basic(i32* %p, i64 %a) { ; CHECK-LABEL: @basic( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 3 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 2 -; CHECK-NEXT: ret ptr [[TMP3]] +; CHECK-NEXT: ret ptr [[TMP2]] ; %1 = getelementptr inbounds i32, i32* %p, i64 1 %2 = getelementptr inbounds i32, i32* %1, i64 %a @@ -25,9 +24,8 @@ ; result = (struct.C*) p + 3 define %struct.C* @offsetDivisible(i64* %p) { ; CHECK-LABEL: @offsetDivisible( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 3 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[TMP1]], i64 1 -; CHECK-NEXT: ret ptr [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[P:%.*]], i64 3 +; CHECK-NEXT: ret ptr [[TMP1]] ; %1 = getelementptr inbounds i64, i64* %p, i64 3 %2 = getelementptr inbounds %struct.C, %struct.C* %1, i64 1 @@ -37,10 +35,9 @@ ; result = (i8*) ((i16*) p + 7) + a define ptr @opaque(ptr %p, i64 %a) { ; CHECK-LABEL: @opaque( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 -1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 7 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[A:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 2 -; CHECK-NEXT: ret ptr [[TMP3]] +; CHECK-NEXT: ret ptr [[TMP2]] ; %1 = getelementptr inbounds i16, ptr %p, i64 -1 %2 = getelementptr inbounds i8, ptr %1, i64 %a @@ -51,10 +48,9 @@ ; result = (i32*) (p - 9) + a define i32* @bitcast(i8* %p, i64 %a) { ; CHECK-LABEL: @bitcast( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 -1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 -9 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 -2 -; CHECK-NEXT: ret ptr [[TMP3]] +; CHECK-NEXT: ret ptr [[TMP2]] ; %1 = getelementptr inbounds i8, i8* %p, i64 -1 %2 = bitcast i8* %1 to i32* @@ -67,9 +63,8 @@ ; result = (i8*) p + 10 define i8* @bitcastReverse(i64* %p, i64 %a) { ; CHECK-LABEL: @bitcastReverse( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 2 -; CHECK-NEXT: ret ptr [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 10 +; CHECK-NEXT: ret ptr [[TMP1]] ; %1 = getelementptr inbounds i64, i64* %p, i64 1 %2 = bitcast i64* %1 to i8* @@ -80,13 +75,11 @@ ; result = (i16*) ((i64*) ((i8*) p + a) + (a * b)) + 29 define i16* @nested(i32* %p, i64 %a, i64 %b) { ; CHECK-LABEL: @nested( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 3 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[A:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[A]], [[B:%.*]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 4 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i64 7 -; CHECK-NEXT: ret ptr [[TMP6]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 [[A]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 29 +; CHECK-NEXT: ret ptr [[TMP4]] ; %1 = getelementptr inbounds i32, i32* %p, i64 3 %2 = getelementptr inbounds i8, i8* %1, i64 %a @@ -103,10 +96,8 @@ ; result = (i8*) p + a define i8* @zeroSum(i32 *%p, i64 %a) { ; CHECK-LABEL: @zeroSum( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[A:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 -4 -; CHECK-NEXT: ret ptr [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 [[A:%.*]] +; CHECK-NEXT: ret ptr [[TMP1]] ; %1 = getelementptr inbounds i32, i32* %p, i64 1 %2 = bitcast i32* %1 to i8* @@ -119,10 +110,9 @@ ; result = (p + 254) + a define i32* @indexOverflow(i32* %p, i64 %a) { ; CHECK-LABEL: @indexOverflow( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 127 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 254 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 127 -; CHECK-NEXT: ret ptr [[TMP3]] +; CHECK-NEXT: ret ptr [[TMP2]] ; %1 = getelementptr inbounds i32, i32* %p, i8 127 %2 = getelementptr inbounds i32, i32* %1, i64 %a @@ -138,8 +128,8 @@ ; CHECK-LABEL: @constIndexGEPMultipleUses( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP6]] @@ -157,10 +147,9 @@ ; result = (i32*) ((p[1] + 17) + a) define i32* @array1([20 x i8]* %p, i64 %a) { ; CHECK-LABEL: @array1( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [20 x i8], ptr [[P:%.*]], i64 1, i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [20 x i8], ptr [[P:%.*]], i64 1, i64 17 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[A:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 2 -; CHECK-NEXT: ret ptr [[TMP3]] +; CHECK-NEXT: ret ptr [[TMP2]] ; %1 = getelementptr inbounds [20 x i8], [20 x i8]* %p, i64 1, i64 1 %2 = getelementptr inbounds i8, i8* %1, i64 %a @@ -171,10 +160,9 @@ ; result = (i8*) ((i32*) p + a) + 14 define i8* @array2([7 x i32]* %p, i64 %a) { ; CHECK-LABEL: @array2( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [7 x i32], ptr [[P:%.*]], i64 0, i64 3 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[A:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 2 -; CHECK-NEXT: ret ptr [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[A:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 14 +; CHECK-NEXT: ret ptr [[TMP2]] ; %1 = getelementptr inbounds [7 x i32], [7 x i32]* %p, i64 0, i64 3 %2 = getelementptr inbounds i32, i32* %1, i64 %a @@ -185,10 +173,9 @@ ; result = (([3 x i8]*) (p + a))[1] + 17 define i8* @array3(i64* %p, i64 %a) { ; CHECK-LABEL: @array3( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 2 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 [[A:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [3 x i8], ptr [[TMP2]], i64 1, i64 1 -; CHECK-NEXT: ret ptr [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[A:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [3 x i8], ptr [[TMP1]], i64 1, i64 17 +; CHECK-NEXT: ret ptr [[TMP2]] ; %1 = getelementptr inbounds i64, i64* %p, i64 2 %2 = getelementptr inbounds i64, i64* %1, i64 %a @@ -199,10 +186,9 @@ ; result = ((i8*) p + a) - 4 define i8* @struct(%struct.A* %p, i64 %a) { ; CHECK-LABEL: @struct( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_A:%.*]], ptr [[P:%.*]], i64 0, i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[A:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 -128 -; CHECK-NEXT: ret ptr [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 [[A:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 -4 +; CHECK-NEXT: ret ptr [[TMP2]] ; %1 = getelementptr inbounds %struct.A, %struct.A* %p, i64 0, i32 1 %2 = getelementptr inbounds i8, i8* %1, i64 %a @@ -213,10 +199,9 @@ ; result = (i32*) ((p - 4) + a) define i32* @structReverse(i8* %p, i64 %a) { ; CHECK-LABEL: @structReverse( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 -128 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 -4 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[A:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_A:%.*]], ptr [[TMP2]], i64 0, i32 1 -; CHECK-NEXT: ret ptr [[TMP3]] +; CHECK-NEXT: ret ptr [[TMP2]] ; %1 = getelementptr inbounds i8, i8* %p, i64 -128 %2 = getelementptr inbounds i8, i8* %1, i64 %a @@ -227,10 +212,9 @@ ; result = ((i8*) &p[0].member2.member0 + 7) + a define i8* @structStruct(%struct.B* %p, i64 %a) { ; CHECK-LABEL: @structStruct( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[P:%.*]], i64 0, i32 2, i32 0, i64 3 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[P:%.*]], i64 0, i32 2, i32 0, i64 7 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[A:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_A:%.*]], ptr [[TMP2]], i64 0, i32 0, i64 4 -; CHECK-NEXT: ret ptr [[TMP3]] +; CHECK-NEXT: ret ptr [[TMP2]] ; %1 = getelementptr inbounds %struct.B, %struct.B* %p, i64 0, i32 2, i32 0, i64 3 %2 = getelementptr inbounds i8, i8* %1, i64 %a @@ -244,10 +228,9 @@ ; result = (i8*) ((i16*) &p[0].member1 + 4) + a define i64* @appendIndex(%struct.B* %p, i64 %a) { ; CHECK-LABEL: @appendIndex( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[P:%.*]], i64 0, i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[P:%.*]], i64 0, i32 1, i64 4 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[A:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 1 -; CHECK-NEXT: ret ptr [[TMP3]] +; CHECK-NEXT: ret ptr [[TMP2]] ; %1 = getelementptr inbounds %struct.B, %struct.B* %p, i64 0, i32 1 %2 = getelementptr inbounds i8, i8* %1, i64 %a @@ -258,10 +241,9 @@ ; result = (i16*) &((struct.B*) (p + a))[0].member1 + 4 define ptr @appendIndexReverse(i64* %p, i64 %a) { ; CHECK-LABEL: @appendIndexReverse( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 [[A:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[TMP2]], i64 0, i32 1 -; CHECK-NEXT: ret ptr [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[P:%.*]], i64 [[A:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_B:%.*]], ptr [[TMP1]], i64 0, i32 1, i64 4 +; CHECK-NEXT: ret ptr [[TMP2]] ; %1 = getelementptr inbounds i64, i64* %p, i64 1 %2 = getelementptr inbounds i64, i64* %1, i64 %a @@ -274,12 +256,11 @@ ; result = (struct.C*) ((<5 x i32>*) ((p + 4) + a) + 1) + b define ptr @skipMiddleGEP(%struct.C* %p, i64 %a, i64 %b) { ; CHECK-LABEL: @skipMiddleGEP( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_C:%.*]], ptr [[P:%.*]], i64 4 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_C]], ptr [[TMP1]], i64 [[A:%.*]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds <5 x i32>, ptr [[TMP2]], i64 1 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_C]], ptr [[TMP3]], i64 [[B:%.*]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_C]], ptr [[TMP4]], i64 3 -; CHECK-NEXT: ret ptr [[TMP5]] +; CHECK-NEXT: ret ptr [[TMP4]] ; %1 = getelementptr inbounds %struct.C, %struct.C* %p, i64 1 %2 = getelementptr inbounds %struct.C, %struct.C* %1, i64 %a diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll @@ -44,9 +44,8 @@ ; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP3]] to <4 x double>* ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[TMP4]], align 8, !alias.scope !0 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> poison, <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 -4 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 -3 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[TMP6]] to <4 x double>* +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 -7 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[TMP5]] to <4 x double>* ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x double>, <4 x double>* [[TMP7]], align 8, !alias.scope !0 ; CHECK-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x double> [[WIDE_LOAD6]], <4 x double> poison, <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = fcmp une <4 x double> [[REVERSE]], zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -762,11 +762,9 @@ ; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> ; CHECK-NEXT: [[STRIDED_VEC6:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[VEC_IND]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[NEXT_GEP]], i64 2 ; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[STRIDED_VEC5]], [[VEC_IND]] ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[STRIDED_VEC6]], [[VEC_IND]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 -2 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <12 x i32>* +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[NEXT_GEP]] to <12 x i32>* ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> [[TMP9]], <12 x i32>