diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -4568,7 +4568,9 @@ Perform the :ref:`getelementptr operation ` on constants. As with the :ref:`getelementptr ` instruction, the index list may have one or more indexes, which are - required to make sense for the type of "pointer to TY". + required to make sense for the type of "pointer to TY". These indexes + may be implicitly sign-extended or truncated to match the index size + of CSTPTR's address space. ``select (COND, VAL1, VAL2)`` Perform the :ref:`select operation ` on constants. ``icmp COND (VAL1, VAL2)`` @@ -10325,11 +10327,11 @@ pointee. ``!nontemporal`` does not have any defined semantics for atomic loads. The optional constant ``align`` argument specifies the alignment of the -operation (that is, the alignment of the memory address). It is the +operation (that is, the alignment of the memory address). It is the responsibility of the code emitter to ensure that the alignment information is -correct. Overestimating the alignment results in undefined behavior. +correct. Overestimating the alignment results in undefined behavior. Underestimating the alignment may produce less efficient code. An alignment of -1 is always safe. The maximum possible alignment is ``1 << 32``. An alignment +1 is always safe. The maximum possible alignment is ``1 << 32``. An alignment value higher than the size of the loaded type implies memory up to the alignment value bytes can be safely loaded without trapping in the default address space. Access of the high bytes can interfere with debugging tools, so @@ -10465,11 +10467,11 @@ pointee. ``!nontemporal`` does not have any defined semantics for atomic stores. The optional constant ``align`` argument specifies the alignment of the -operation (that is, the alignment of the memory address). It is the +operation (that is, the alignment of the memory address). It is the responsibility of the code emitter to ensure that the alignment information is -correct. Overestimating the alignment results in undefined behavior. +correct. Overestimating the alignment results in undefined behavior. Underestimating the alignment may produce less efficient code. An alignment of -1 is always safe. The maximum possible alignment is ``1 << 32``. An alignment +1 is always safe. The maximum possible alignment is ``1 << 32``. An alignment value higher than the size of the loaded type implies memory up to the alignment value bytes can be safely loaded without trapping in the default address space. Access of the high bytes can interfere with debugging tools, so @@ -10897,9 +10899,9 @@ If the ``inbounds`` keyword is not present, the offsets are added to the base address with silently-wrapping two's complement arithmetic. If the -offsets have a different width from the pointer, they are sign-extended -or truncated to the width of the pointer. The result value of the -``getelementptr`` may be outside the object pointed to by the base +offsets have a different width from the pointer's index type, they are +sign-extended or truncated to the width of the pointer's index type. The result +value of the ``getelementptr`` may be outside the object pointed to by the base pointer. The result value may not necessarily be used to access memory though, even if it happens to point into allocated storage. See the :ref:`Pointer Aliasing Rules ` section for more diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -571,6 +571,11 @@ /// are set. unsigned getLargestLegalIntTypeSizeInBits() const; + /// Returns the type of a GEP index in AddressSpace. + /// If it was not specified explicitly, it will be the integer type of the + /// pointer width - IntPtrType. + IntegerType *getIndexType(LLVMContext &C, unsigned AddressSpace) const; + /// Returns the type of a GEP index. /// If it was not specified explicitly, it will be the integer type of the /// pointer width - IntPtrType. diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -567,6 +567,12 @@ return DL.getIntPtrType(Context, AddrSpace); } + /// Fetch the type of an integer that should be used to index GEP operations + /// within AddressSpace. + IntegerType *getIndexTy(const DataLayout &DL, unsigned AddrSpace) { + return DL.getIndexType(Context, AddrSpace); + } + //===--------------------------------------------------------------------===// // Intrinsic creation methods //===--------------------------------------------------------------------===// diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp --- a/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -1099,12 +1099,13 @@ // must be a VLA assert(I.isArrayAllocation()); - // If needed, adjust the alloca's operand size to match the pointer size. - // Subsequent math operations expect the types to match. + // If needed, adjust the alloca's operand size to match the pointer indexing + // size. Subsequent math operations expect the types to match. Value *ArraySize = Builder.CreateZExtOrTrunc( - I.getArraySize(), DL.getIntPtrType(I.getContext())); + I.getArraySize(), + DL.getIndexType(I.getContext(), DL.getAllocaAddrSpace())); assert(ArraySize->getType() == Zero->getType() && - "Expected zero constant to have pointer type"); + "Expected zero constant to have pointer index type"); Value *Size = ConstantInt::get(ArraySize->getType(), DL.getTypeAllocSize(I.getAllocatedType())); diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp --- a/llvm/lib/IR/DataLayout.cpp +++ b/llvm/lib/IR/DataLayout.cpp @@ -885,6 +885,11 @@ return Max != LegalIntWidths.end() ? *Max : 0; } +IntegerType *DataLayout::getIndexType(LLVMContext &C, + unsigned AddressSpace) const { + return IntegerType::get(C, getIndexSizeInBits(AddressSpace)); +} + Type *DataLayout::getIndexType(Type *Ty) const { assert(Ty->isPtrOrPtrVectorTy() && "Expected a pointer or pointer vector type."); diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp --- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -700,7 +700,7 @@ } if (auto GEP = dyn_cast(V)) { - APInt APOffset(DL.getPointerSizeInBits(0), 0); + APInt APOffset(DL.getIndexSizeInBits(0), 0); bool Result = GEP->accumulateConstantOffset(DL, APOffset); if (!Result) return false; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -277,14 +277,14 @@ // order the state machines in complexity of the generated code. Value *Idx = GEP->getOperand(2); - // If the index is larger than the pointer size of the target, truncate the - // index down like the GEP would do implicitly. We don't have to do this for - // an inbounds GEP because the index can't be out of range. + // If the index is larger than the pointer offset size of the target, truncate + // the index down like the GEP would do implicitly. We don't have to do this + // for an inbounds GEP because the index can't be out of range. if (!GEP->isInBounds()) { - Type *IntPtrTy = DL.getIntPtrType(GEP->getType()); - unsigned PtrSize = IntPtrTy->getIntegerBitWidth(); - if (Idx->getType()->getPrimitiveSizeInBits().getFixedValue() > PtrSize) - Idx = Builder.CreateTrunc(Idx, IntPtrTy); + Type *PtrIdxTy = DL.getIndexType(GEP->getType()); + unsigned OffsetSize = PtrIdxTy->getIntegerBitWidth(); + if (Idx->getType()->getPrimitiveSizeInBits().getFixedValue() > OffsetSize) + Idx = Builder.CreateTrunc(Idx, PtrIdxTy); } // If inbounds keyword is not present, Idx * ElementSize can overflow. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -219,7 +219,7 @@ // Now that I is pointing to the first non-allocation-inst in the block, // insert our getelementptr instruction... // - Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType()); + Type *IdxTy = IC.getDataLayout().getIndexType(AI.getType()); Value *NullIdx = Constant::getNullValue(IdxTy); Value *Idx[2] = {NullIdx, NullIdx}; Instruction *GEP = GetElementPtrInst::CreateInBounds( @@ -235,11 +235,12 @@ if (isa(AI.getArraySize())) return IC.replaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); - // Ensure that the alloca array size argument has type intptr_t, so that - // any casting is exposed early. - Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType()); - if (AI.getArraySize()->getType() != IntPtrTy) { - Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), IntPtrTy, false); + // Ensure that the alloca array size argument has type equal to the offset + // size of the alloca() pointer, which, in the tyical case, is intptr_t, + // so that any casting is exposed early. + Type *PtrIdxTy = IC.getDataLayout().getIndexType(AI.getType()); + if (AI.getArraySize()->getType() != PtrIdxTy) { + Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), PtrIdxTy, false); return IC.replaceOperand(AI, 0, V); } diff --git a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp --- a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp +++ b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -71,8 +71,8 @@ Value *Offset = SizeOffset.second; ConstantInt *SizeCI = dyn_cast(Size); - Type *IntTy = DL.getIntPtrType(Ptr->getType()); - Value *NeededSizeVal = IRB.CreateTypeSize(IntTy, NeededSize); + Type *IndexTy = DL.getIndexType(Ptr->getType()); + Value *NeededSizeVal = IRB.CreateTypeSize(IndexTy, NeededSize); auto SizeRange = SE.getUnsignedRange(SE.getSCEV(Size)); auto OffsetRange = SE.getUnsignedRange(SE.getSCEV(Offset)); @@ -97,7 +97,7 @@ Value *Or = IRB.CreateOr(Cmp2, Cmp3); if ((!SizeCI || SizeCI->getValue().slt(0)) && !SizeRange.getSignedMin().isNonNegative()) { - Value *Cmp1 = IRB.CreateICmpSLT(Offset, ConstantInt::get(IntTy, 0)); + Value *Cmp1 = IRB.CreateICmpSLT(Offset, ConstantInt::get(IndexTy, 0)); Or = IRB.CreateOr(Cmp1, Or); } diff --git a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp --- a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -410,8 +410,8 @@ // Get offset from the base GV. PointerType *GVPtrTy = cast(BaseGV->getType()); - IntegerType *PtrIntTy = DL->getIntPtrType(*Ctx, GVPtrTy->getAddressSpace()); - APInt Offset(DL->getTypeSizeInBits(PtrIntTy), /*val*/0, /*isSigned*/true); + IntegerType *OffsetTy = DL->getIndexType(*Ctx, GVPtrTy->getAddressSpace()); + APInt Offset(DL->getTypeSizeInBits(OffsetTy), /*val*/ 0, /*isSigned*/ true); auto *GEPO = cast(ConstExpr); // TODO: If we have a mix of inbounds and non-inbounds GEPs, then basing a @@ -432,7 +432,7 @@ // to be cheaper than compute it by , which can be lowered to // an ADD instruction or folded into Load/Store instruction. InstructionCost Cost = - TTI->getIntImmCostInst(Instruction::Add, 1, Offset, PtrIntTy, + TTI->getIntImmCostInst(Instruction::Add, 1, Offset, OffsetTy, TargetTransformInfo::TCK_SizeAndLatency, Inst); ConstCandVecType &ExprCandVec = ConstGEPCandMap[BaseGV]; ConstCandMapType::iterator Itr; diff --git a/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/llvm/lib/Transforms/Scalar/MergeICmps.cpp --- a/llvm/lib/Transforms/Scalar/MergeICmps.cpp +++ b/llvm/lib/Transforms/Scalar/MergeICmps.cpp @@ -157,7 +157,7 @@ return {}; } - APInt Offset = APInt(DL.getPointerTypeSizeInBits(Addr->getType()), 0); + APInt Offset = APInt(DL.getIndexTypeSizeInBits(Addr->getType()), 0); Value *Base = Addr; auto *GEP = dyn_cast(Addr); if (GEP) { diff --git a/llvm/lib/Transforms/Scalar/NaryReassociate.cpp b/llvm/lib/Transforms/Scalar/NaryReassociate.cpp --- a/llvm/lib/Transforms/Scalar/NaryReassociate.cpp +++ b/llvm/lib/Transforms/Scalar/NaryReassociate.cpp @@ -351,9 +351,9 @@ bool NaryReassociatePass::requiresSignExtension(Value *Index, GetElementPtrInst *GEP) { - unsigned PointerSizeInBits = - DL->getPointerSizeInBits(GEP->getType()->getPointerAddressSpace()); - return cast(Index->getType())->getBitWidth() < PointerSizeInBits; + unsigned IndexSizeInBits = + DL->getIndexSizeInBits(GEP->getType()->getPointerAddressSpace()); + return cast(Index->getType())->getBitWidth() < IndexSizeInBits; } GetElementPtrInst * @@ -449,12 +449,12 @@ return nullptr; // NewGEP = &Candidate[RHS * (sizeof(IndexedType) / sizeof(Candidate[0]))); - Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); - if (RHS->getType() != IntPtrTy) - RHS = Builder.CreateSExtOrTrunc(RHS, IntPtrTy); + Type *PtrIdxTy = DL->getIndexType(GEP->getType()); + if (RHS->getType() != PtrIdxTy) + RHS = Builder.CreateSExtOrTrunc(RHS, PtrIdxTy); if (IndexedSize != ElementSize) { RHS = Builder.CreateMul( - RHS, ConstantInt::get(IntPtrTy, IndexedSize / ElementSize)); + RHS, ConstantInt::get(PtrIdxTy, IndexedSize / ElementSize)); } GetElementPtrInst *NewGEP = cast( Builder.CreateGEP(GEP->getResultElementType(), Candidate, RHS)); diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -428,7 +428,7 @@ /// Returns true if the module changes. /// /// Verified in @i32_add in split-gep.ll - bool canonicalizeArrayIndicesToPointerSize(GetElementPtrInst *GEP); + bool canonicalizeArrayIndicesToIndexSize(GetElementPtrInst *GEP); /// Optimize sext(a)+sext(b) to sext(a+b) when a+b can't sign overflow. /// SeparateConstOffsetFromGEP distributes a sext to leaves before extracting @@ -791,17 +791,17 @@ .getSExtValue(); } -bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToPointerSize( +bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToIndexSize( GetElementPtrInst *GEP) { bool Changed = false; - Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); + Type *PtrIdxTy = DL->getIndexType(GEP->getType()); gep_type_iterator GTI = gep_type_begin(*GEP); for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end(); I != E; ++I, ++GTI) { // Skip struct member indices which must be i32. if (GTI.isSequential()) { - if ((*I)->getType() != IntPtrTy) { - *I = CastInst::CreateIntegerCast(*I, IntPtrTy, true, "idxprom", GEP); + if ((*I)->getType() != PtrIdxTy) { + *I = CastInst::CreateIntegerCast(*I, PtrIdxTy, true, "idxprom", GEP); Changed = true; } } @@ -849,7 +849,7 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs( GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset) { IRBuilder<> Builder(Variadic); - Type *IntPtrTy = DL->getIntPtrType(Variadic->getType()); + Type *PtrIndexTy = DL->getIndexType(Variadic->getType()); Type *I8PtrTy = Builder.getInt8PtrTy(Variadic->getType()->getPointerAddressSpace()); @@ -875,15 +875,16 @@ if (CI->isZero()) continue; - APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(), + APInt ElementSize = APInt(PtrIndexTy->getIntegerBitWidth(), DL->getTypeAllocSize(GTI.getIndexedType())); // Scale the index by element size. if (ElementSize != 1) { if (ElementSize.isPowerOf2()) { Idx = Builder.CreateShl( - Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2())); + Idx, ConstantInt::get(PtrIndexTy, ElementSize.logBase2())); } else { - Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize)); + Idx = + Builder.CreateMul(Idx, ConstantInt::get(PtrIndexTy, ElementSize)); } } // Create an ugly GEP with a single index for each index. @@ -896,7 +897,7 @@ // Create a GEP with the constant offset index. if (AccumulativeByteOffset != 0) { - Value *Offset = ConstantInt::get(IntPtrTy, AccumulativeByteOffset); + Value *Offset = ConstantInt::get(PtrIndexTy, AccumulativeByteOffset); ResultPtr = Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Offset, "uglygep"); } else @@ -922,6 +923,9 @@ int64_t AccumulativeByteOffset) { IRBuilder<> Builder(Variadic); Type *IntPtrTy = DL->getIntPtrType(Variadic->getType()); + assert(IntPtrTy == DL->getIndexType(Variadic->getType()) && + "Pointer type must match index type for arithmetic-based lowering of " + "split GEPs"); Value *ResultPtr = Builder.CreatePtrToInt(Variadic->getOperand(0), IntPtrTy); gep_type_iterator GTI = gep_type_begin(*Variadic); @@ -973,7 +977,7 @@ if (GEP->hasAllConstantIndices()) return false; - bool Changed = canonicalizeArrayIndicesToPointerSize(GEP); + bool Changed = canonicalizeArrayIndicesToIndexSize(GEP); bool NeedsExtraction; int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction); @@ -1057,7 +1061,15 @@ if (LowerGEP) { // As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to // arithmetic operations if the target uses alias analysis in codegen. - if (TTI.useAA()) + // Additionally, pointers that aren't integral (and so can't be safely + // converted to integers) or those whose offset size is different from their + // pointer size (which means that doing integer arithmetic on them could + // affect that data) can't be lowered in this way. + unsigned AddrSpace = GEP->getPointerAddressSpace(); + bool PointerHasExtraData = DL->getPointerSizeInBits(AddrSpace) != + DL->getIndexSizeInBits(AddrSpace); + if (TTI.useAA() || DL->isNonIntegralAddressSpace(AddrSpace) || + PointerHasExtraData) lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset); else lowerToArithmetics(GEP, AccumulativeByteOffset); @@ -1104,13 +1116,13 @@ // used with unsigned integers later. int64_t ElementTypeSizeOfGEP = static_cast( DL->getTypeAllocSize(GEP->getResultElementType())); - Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); + Type *PtrIdxTy = DL->getIndexType(GEP->getType()); if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) { // Very likely. As long as %gep is naturally aligned, the byte offset we // extracted should be a multiple of sizeof(*%gep). int64_t Index = AccumulativeByteOffset / ElementTypeSizeOfGEP; NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP, - ConstantInt::get(IntPtrTy, Index, true), + ConstantInt::get(PtrIdxTy, Index, true), GEP->getName(), GEP); NewGEP->copyMetadata(*GEP); // Inherit the inbounds attribute of the original GEP. @@ -1136,7 +1148,7 @@ NewGEP = cast(Builder.CreateGEP( Builder.getInt8Ty(), Builder.CreateBitCast(NewGEP, I8PtrTy), - {ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true)}, "uglygep", + {ConstantInt::get(PtrIdxTy, AccumulativeByteOffset, true)}, "uglygep", GEPWasInBounds)); NewGEP->copyMetadata(*GEP); diff --git a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp --- a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp @@ -484,9 +484,9 @@ // = B + (sext(Idx) * sext(S)) * ElementSize // = B + (sext(Idx) * ElementSize) * sext(S) // Casting to IntegerType is safe because we skipped vector GEPs. - IntegerType *IntPtrTy = cast(DL->getIntPtrType(I->getType())); + IntegerType *PtrIdxTy = cast(DL->getIndexType(I->getType())); ConstantInt *ScaledIdx = ConstantInt::get( - IntPtrTy, Idx->getSExtValue() * (int64_t)ElementSize, true); + PtrIdxTy, Idx->getSExtValue() * (int64_t)ElementSize, true); allocateCandidatesAndFindBasis(Candidate::GEP, B, ScaledIdx, S, I); } @@ -549,18 +549,18 @@ Value *ArrayIdx = GEP->getOperand(I); uint64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType()); if (ArrayIdx->getType()->getIntegerBitWidth() <= - DL->getPointerSizeInBits(GEP->getAddressSpace())) { - // Skip factoring if ArrayIdx is wider than the pointer size, because - // ArrayIdx is implicitly truncated to the pointer size. + DL->getIndexSizeInBits(GEP->getAddressSpace())) { + // Skip factoring if ArrayIdx is wider than the index size, because + // ArrayIdx is implicitly truncated to the index size. factorArrayIndex(ArrayIdx, BaseExpr, ElementSize, GEP); } // When ArrayIdx is the sext of a value, we try to factor that value as // well. Handling this case is important because array indices are - // typically sign-extended to the pointer size. + // typically sign-extended to the pointer index size. Value *TruncatedArrayIdx = nullptr; if (match(ArrayIdx, m_SExt(m_Value(TruncatedArrayIdx))) && TruncatedArrayIdx->getType()->getIntegerBitWidth() <= - DL->getPointerSizeInBits(GEP->getAddressSpace())) { + DL->getIndexSizeInBits(GEP->getAddressSpace())) { // Skip factoring if TruncatedArrayIdx is wider than the pointer size, // because TruncatedArrayIdx is implicitly truncated to the pointer size. factorArrayIndex(TruncatedArrayIdx, BaseExpr, ElementSize, GEP); @@ -675,24 +675,24 @@ } case Candidate::GEP: { - Type *IntPtrTy = DL->getIntPtrType(C.Ins->getType()); - bool InBounds = cast(C.Ins)->isInBounds(); - if (BumpWithUglyGEP) { - // C = (char *)Basis + Bump - unsigned AS = Basis.Ins->getType()->getPointerAddressSpace(); - Type *CharTy = Type::getInt8PtrTy(Basis.Ins->getContext(), AS); - Reduced = Builder.CreateBitCast(Basis.Ins, CharTy); - Reduced = - Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump, "", InBounds); - Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType()); - } else { - // C = gep Basis, Bump - // Canonicalize bump to pointer size. - Bump = Builder.CreateSExtOrTrunc(Bump, IntPtrTy); - Reduced = Builder.CreateGEP( - cast(Basis.Ins)->getResultElementType(), - Basis.Ins, Bump, "", InBounds); - } + Type *OffsetTy = DL->getIndexType(C.Ins->getType()); + bool InBounds = cast(C.Ins)->isInBounds(); + if (BumpWithUglyGEP) { + // C = (char *)Basis + Bump + unsigned AS = Basis.Ins->getType()->getPointerAddressSpace(); + Type *CharTy = Type::getInt8PtrTy(Basis.Ins->getContext(), AS); + Reduced = Builder.CreateBitCast(Basis.Ins, CharTy); + Reduced = + Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump, "", InBounds); + Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType()); + } else { + // C = gep Basis, Bump + // Canonicalize bump to pointer size. + Bump = Builder.CreateSExtOrTrunc(Bump, OffsetTy); + Reduced = Builder.CreateGEP( + cast(Basis.Ins)->getResultElementType(), Basis.Ins, + Bump, "", InBounds); + } break; } default: diff --git a/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/llvm/lib/Transforms/Utils/FunctionComparator.cpp --- a/llvm/lib/Transforms/Utils/FunctionComparator.cpp +++ b/llvm/lib/Transforms/Utils/FunctionComparator.cpp @@ -748,8 +748,8 @@ // When we have target data, we can reduce the GEP down to the value in bytes // added to the address. const DataLayout &DL = FnL->getParent()->getDataLayout(); - unsigned BitWidth = DL.getPointerSizeInBits(ASL); - APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0); + unsigned OffsetBitWidth = DL.getIndexSizeInBits(ASL); + APInt OffsetL(OffsetBitWidth, 0), OffsetR(OffsetBitWidth, 0); if (GEPL->accumulateConstantOffset(DL, OffsetL) && GEPR->accumulateConstantOffset(DL, OffsetR)) return cmpAPInts(OffsetL, OffsetR); diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -323,17 +323,17 @@ DL.getTypeStoreSize(PtrBTy->getScalarType())) return false; - unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA); - APInt Size(PtrBitWidth, DL.getTypeStoreSize(PtrATy)); + unsigned PtrOffsetWidth = DL.getIndexSizeInBits(ASA); + APInt Size(PtrOffsetWidth, DL.getTypeStoreSize(PtrATy)); return areConsecutivePointers(PtrA, PtrB, Size); } bool Vectorizer::areConsecutivePointers(Value *PtrA, Value *PtrB, APInt PtrDelta, unsigned Depth) const { - unsigned PtrBitWidth = DL.getPointerTypeSizeInBits(PtrA->getType()); - APInt OffsetA(PtrBitWidth, 0); - APInt OffsetB(PtrBitWidth, 0); + unsigned OffsetBitWidth = DL.getIndexTypeSizeInBits(PtrA->getType()); + APInt OffsetA(OffsetBitWidth, 0); + APInt OffsetB(OffsetBitWidth, 0); PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA); PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB); diff --git a/llvm/test/Instrumentation/BoundsChecking/simple.ll b/llvm/test/Instrumentation/BoundsChecking/simple.ll --- a/llvm/test/Instrumentation/BoundsChecking/simple.ll +++ b/llvm/test/Instrumentation/BoundsChecking/simple.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=bounds-checking -S | FileCheck %s -target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-p1:16:16:16-p2:64:64:64:48-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @.str = private constant [8 x i8] c"abcdefg\00" @.str_as1 = private addrspace(1) constant [8 x i8] c"abcdefg\00" +@.str_as2 = private addrspace(2) constant [8 x i8] c"abcdefg\00" + declare noalias ptr @malloc(i64) nounwind allocsize(0) declare noalias ptr @calloc(i64, i64) nounwind allocsize(0,1) @@ -145,6 +147,28 @@ ret void } +define void @f5_as2(i32 %x) nounwind {; +; CHECK-LABEL: @f5_as2( +; CHECK-NEXT: [[X_C:%.*]] = sext i32 [[X:%.*]] to i48 +; CHECK-NEXT: [[TMP1:%.*]] = add i48 0, [[X_C]] +; CHECK-NEXT: [[IDX:%.*]] = getelementptr inbounds [8 x i8], ptr addrspace(2) @.str_as2, i32 0, i32 [[X]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i48 8, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i48 8, [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i48 [[TMP2]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TRAP:%.*]], label [[TMP6:%.*]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr addrspace(2) [[IDX]], align 4 +; CHECK-NEXT: ret void +; CHECK: trap: +; CHECK-NEXT: call void @llvm.trap() #[[ATTR6]] +; CHECK-NEXT: unreachable +; + %idx = getelementptr inbounds [8 x i8], ptr addrspace(2) @.str_as2, i32 0, i32 %x + %1 = load i8, ptr addrspace(2) %idx, align 4 + ret void +} + define void @f6(i64 %x) nounwind { ; CHECK-LABEL: @f6( ; CHECK-NEXT: [[TMP1:%.*]] = alloca i128, align 8 diff --git a/llvm/test/Transforms/InstCombine/alloca-intptr-not-sizet.ll b/llvm/test/Transforms/InstCombine/alloca-intptr-not-sizet.ll --- a/llvm/test/Transforms/InstCombine/alloca-intptr-not-sizet.ll +++ b/llvm/test/Transforms/InstCombine/alloca-intptr-not-sizet.ll @@ -4,8 +4,8 @@ define void @test_array_alloca_intptr_not_sizet(i64 %size, ptr %dest) { ; CHECK-LABEL: @test_array_alloca_intptr_not_sizet( -; CHECK-NEXT: [[TMP1:%.*]] = zext i64 [[SIZE:%.*]] to i128 -; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i8, i128 [[TMP1]], align 1, addrspace(7) +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[SIZE:%.*]] to i32 +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i8, i32 [[TMP1]], align 1, addrspace(7) ; CHECK-NEXT: store ptr addrspace(7) [[ALLOCA]], ptr [[DEST:%.*]], align 16 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/InstCombine/load-cmp.ll b/llvm/test/Transforms/InstCombine/load-cmp.ll --- a/llvm/test/Transforms/InstCombine/load-cmp.ll +++ b/llvm/test/Transforms/InstCombine/load-cmp.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=instcombine -S -data-layout="p:32:32:32-p1:16:16:16-n8:16:32:64" < %s | FileCheck %s +; RUN: opt -passes=instcombine -S -data-layout="p:32:32:32-p1:16:16:16-p2:128:128:128:32-n8:16:32:64" < %s | FileCheck %s @G16 = internal constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85, i16 73, i16 82, i16 69, i16 68, i16 0] @@ -7,6 +7,9 @@ @G16_as1 = internal addrspace(1) constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85, i16 73, i16 82, i16 69, i16 68, i16 0] +@G16_as2 = internal addrspace(2) constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85, + i16 73, i16 82, i16 69, i16 68, i16 0] + @GD = internal constant [6 x double] [double -10.0, double 1.0, double 4.0, double 2.0, double -20.0, double -40.0] @@ -68,6 +71,19 @@ } +define i1 @test1_noinbounds_as2(i64 %x) { +; CHECK-LABEL: @test1_noinbounds_as2( +; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 2147483647 +; CHECK-NEXT: [[R:%.*]] = icmp eq i64 [[TMP1]], 9 +; CHECK-NEXT: ret i1 [[R]] +; + %p = getelementptr [10 x i16], ptr addrspace(2) @G16_as2, i16 0, i64 %x + %q = load i16, ptr addrspace(2) %p + %r = icmp eq i16 %q, 0 + ret i1 %r + +} + define i1 @test2(i32 %X) { ; CHECK-LABEL: @test2( ; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[X:%.*]], 4 diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-vectors.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-vectors.ll --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-vectors.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-vectors.ll @@ -1,6 +1,6 @@ ; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=load-store-vectorizer -S -o - %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7" ; CHECK-LABEL: @merge_v2i32_v2i32( ; CHECK: load <4 x i32> @@ -72,6 +72,23 @@ ret void } +; CHECK-LABEL: @merge_fat_ptrs( +; CHECK: load <4 x i16> +; CHECK: store <4 x i16> zeroinitializer +define amdgpu_kernel void @merge_fat_ptrs(ptr addrspace(7) nocapture %a, ptr addrspace(7) nocapture readonly %b) #0 { +entry: + %a.1 = getelementptr inbounds <2 x i16>, ptr addrspace(7) %a, i32 1 + %b.1 = getelementptr inbounds <2 x i16>, ptr addrspace(7) %b, i32 1 + + %ld.c = load <2 x i16>, ptr addrspace(7) %b, align 4 + %ld.c.idx.1 = load <2 x i16>, ptr addrspace(7) %b.1, align 4 + + store <2 x i16> zeroinitializer, ptr addrspace(7) %a, align 4 + store <2 x i16> zeroinitializer, ptr addrspace(7) %a.1, align 4 + + ret void +} + ; Ideally this would be merged ; CHECK-LABEL: @merge_load_i32_v2i16( ; CHECK: load i32, diff --git a/llvm/test/Transforms/LowerTypeTests/distinct-index-width-crash.ll b/llvm/test/Transforms/LowerTypeTests/distinct-index-width-crash.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LowerTypeTests/distinct-index-width-crash.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=lowertypetests < %s | FileCheck %s +target datalayout = "e-p:64:64:64:32" + +@a = constant i32 1, !type !0 +@b = constant [2 x i32] [i32 2, i32 3], !type !1 + +!0 = !{i32 0, !"typeid1"} +!1 = !{i32 4, !"typeid1"} + +declare i1 @llvm.type.test(ptr %ptr, metadata %bitset) nounwind readnone + +define i1 @bar() { +; CHECK-LABEL: @bar( +; CHECK-NEXT: ret i1 true +; + %x = call i1 @llvm.type.test(ptr getelementptr ([2 x i32], ptr @b, i32 0, i32 1), metadata !"typeid1") + ret i1 %x +} diff --git a/llvm/test/Transforms/MergeFunc/different-index-width-gep-crash.ll b/llvm/test/Transforms/MergeFunc/different-index-width-gep-crash.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/MergeFunc/different-index-width-gep-crash.ll @@ -0,0 +1,13 @@ +; RUN: opt -passes=mergefunc -disable-output < %s + +target datalayout = "e-p:64:64-p2:128:128:128:32" + +define void @foo(ptr addrspace(2) %x) { + %tmp = getelementptr i32, ptr addrspace(2) %x, i32 1 + ret void +} + +define void @bar(ptr addrspace(2) %x) { + %tmp = getelementptr i32, ptr addrspace(2) %x, i32 1 + ret void +} diff --git a/llvm/test/Transforms/MergeICmps/X86/distinct-index-width-crash.ll b/llvm/test/Transforms/MergeICmps/X86/distinct-index-width-crash.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/MergeICmps/X86/distinct-index-width-crash.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=mergeicmps -verify-dom-info -S | FileCheck %s + +target triple = "x86_64" + +; This is very much not an x86 ABI, in current use, but we're testing +; that we've fixed a bug where accumulateConstantOffset() was called incorrectly. +target datalayout = "e-p:64:64:64:32" + +; Define a cunstom data layout that has index width < pointer width +; and make sure that doesn't mreak anything +define void @fat_ptrs(ptr dereferenceable(16) %a, ptr dereferenceable(16) %b) { +; CHECK-LABEL: @fat_ptrs( +; CHECK-NEXT: bb0: +; CHECK-NEXT: [[PTR_A1:%.*]] = getelementptr inbounds [2 x i64], ptr [[A:%.*]], i32 0, i32 1 +; CHECK-NEXT: [[PTR_B1:%.*]] = getelementptr inbounds [2 x i64], ptr [[B:%.*]], i32 0, i32 1 +; CHECK-NEXT: br label %"bb1+bb2" +; CHECK: "bb1+bb2": +; CHECK-NEXT: [[MEMCMP:%.*]] = call i32 @memcmp(ptr [[A]], ptr [[B]], i64 16) +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[MEMCMP]], 0 +; CHECK-NEXT: br label [[BB3:%.*]] +; CHECK: bb3: +; CHECK-NEXT: ret void +; +bb0: + %ptr_a1 = getelementptr inbounds [2 x i64], ptr %a, i32 0, i32 1 + %ptr_b1 = getelementptr inbounds [2 x i64], ptr %b, i32 0, i32 1 + br label %bb1 + +bb1: ; preds = %bb0 + %a0 = load i64, ptr %a + %b0 = load i64, ptr %b + %cond0 = icmp eq i64 %a0, %b0 + br i1 %cond0, label %bb2, label %bb3 + +bb2: ; preds = %bb1 + %a1 = load i64, ptr %ptr_a1 + %b1 = load i64, ptr %ptr_b1 + %cond1 = icmp eq i64 %a1, %b1 + br label %bb3 + +bb3: ; preds = %bb2, %bb1 + %necessary = phi i1 [ %cond1, %bb2 ], [ false, %bb1 ] + ret void +} diff --git a/llvm/test/Transforms/NaryReassociate/nary-gep.ll b/llvm/test/Transforms/NaryReassociate/nary-gep.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/NaryReassociate/nary-gep.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=nary-reassociate -S | FileCheck %s + +target datalayout = "e-p:64:64-p1:32:32:32-p2:128:128:128:32-i64:64-v16:16-v32:32-n32:64" + +; Even though addrspace(2) has 128 bit pointers, no sign extension needed +; becasue it has 32-bit offsets. +define void @no_sext_fat_pointer(ptr addrspace(2) %a, i32 %i, i32 %j) { +; CHECK-LABEL: @no_sext_fat_pointer( +; CHECK-NEXT: [[V2:%.*]] = getelementptr float, ptr addrspace(2) [[A:%.*]], i32 [[I:%.*]] +; CHECK-NEXT: call void @foo(ptr addrspace(2) [[V2]]) +; CHECK-NEXT: [[V3:%.*]] = getelementptr float, ptr addrspace(2) [[V2]], i32 [[J:%.*]] +; CHECK-NEXT: call void @foo(ptr addrspace(2) [[V3]]) +; CHECK-NEXT: ret void +; + %v1 = add i32 %i, %j + %v2 = getelementptr float, ptr addrspace(2) %a, i32 %i + call void @foo(ptr addrspace(2) %v2) + %v3 = getelementptr float, ptr addrspace(2) %a, i32 %v1 + call void @foo(ptr addrspace(2) %v3) + ret void +} + +declare void @foo(ptr addrspace(2)) diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/pointer-type-not-offset-type.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/pointer-type-not-offset-type.ll --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/pointer-type-not-offset-type.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/pointer-type-not-offset-type.ll @@ -4,8 +4,8 @@ define void @should_be_trunc(ptr addrspace(1) %ptr, i64 %index, ptr %result) { ; CHECK-LABEL: @should_be_trunc( -; CHECK-NEXT: [[IDXPROM:%.*]] = sext i64 [[INDEX:%.*]] to i128 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR:%.*]], i128 [[IDXPROM]] +; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[INDEX:%.*]] to i32 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR:%.*]], i32 [[IDXPROM]] ; CHECK-NEXT: store ptr addrspace(1) [[GEP]], ptr [[RESULT:%.*]], align 16 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/StraightLineStrengthReduce/slsr-gep.ll b/llvm/test/Transforms/StraightLineStrengthReduce/slsr-gep.ll --- a/llvm/test/Transforms/StraightLineStrengthReduce/slsr-gep.ll +++ b/llvm/test/Transforms/StraightLineStrengthReduce/slsr-gep.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -passes=slsr,gvn -S | FileCheck %s ; RUN: opt < %s -passes='slsr,gvn' -S | FileCheck %s -target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64-p:64:64:64-p1:32:32:32" +target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64-p:64:64:64-p1:32:32:32-p2:128:128:128:32" ; foo(input[0]); ; foo(input[s]); @@ -183,6 +183,23 @@ ret void } +define void @slsr_gep_fat_pointer(ptr addrspace(2) %input, i32 %s) { + ; p1 = &input[s] + %p1 = getelementptr inbounds i32, ptr addrspace(2) %input, i32 %s + call void @baz2(ptr addrspace(2) %p1) + + ; p2 = &input[s * 2] + %s2 = mul nsw i32 %s, 2 + %p2 = getelementptr inbounds i32, ptr addrspace(2) %input, i32 %s2 +; CHECK: %p2 = getelementptr inbounds i32, ptr addrspace(2) %p1, i32 %s + ; Use index bitwidth, not pointer size (i128) + call void @baz2(ptr addrspace(2) %p2) + + ret void +} + + declare void @foo(ptr) declare void @bar(ptr) declare void @baz(ptr addrspace(1)) +declare void @baz2(ptr addrspace(2))