diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -571,6 +571,11 @@ /// are set. unsigned getLargestLegalIntTypeSizeInBits() const; + /// Returns the type of a GEP index in AddressSpace. + /// If it was not specified explicitly, it will be the integer type of the + /// pointer width - IntPtrType. + IntegerType *getIndexType(LLVMContext &C, unsigned AddressSpace) const; + /// Returns the type of a GEP index. /// If it was not specified explicitly, it will be the integer type of the /// pointer width - IntPtrType. diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -567,6 +567,12 @@ return DL.getIntPtrType(Context, AddrSpace); } + /// Fetch the type of an integer that should be used to index GEP operations + /// within AddressSpace. + IntegerType *getIndexTy(const DataLayout &DL, unsigned AddrSpace) { + return DL.getIndexType(Context, AddrSpace); + } + //===--------------------------------------------------------------------===// // Intrinsic creation methods //===--------------------------------------------------------------------===// diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp --- a/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -1099,12 +1099,13 @@ // must be a VLA assert(I.isArrayAllocation()); - // If needed, adjust the alloca's operand size to match the pointer size. - // Subsequent math operations expect the types to match. + // If needed, adjust the alloca's operand size to match the pointer indexing + // size. Subsequent math operations expect the types to match. Value *ArraySize = Builder.CreateZExtOrTrunc( - I.getArraySize(), DL.getIntPtrType(I.getContext())); + I.getArraySize(), + DL.getIndexType(I.getContext(), DL.getAllocaAddrSpace())); assert(ArraySize->getType() == Zero->getType() && - "Expected zero constant to have pointer type"); + "Expected zero constant to have pointer index type"); Value *Size = ConstantInt::get(ArraySize->getType(), DL.getTypeAllocSize(I.getAllocatedType())); diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp --- a/llvm/lib/IR/DataLayout.cpp +++ b/llvm/lib/IR/DataLayout.cpp @@ -885,6 +885,11 @@ return Max != LegalIntWidths.end() ? *Max : 0; } +IntegerType *DataLayout::getIndexType(LLVMContext &C, + unsigned AddressSpace) const { + return IntegerType::get(C, getIndexSizeInBits(AddressSpace)); +} + Type *DataLayout::getIndexType(Type *Ty) const { assert(Ty->isPtrOrPtrVectorTy() && "Expected a pointer or pointer vector type."); diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp --- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -686,7 +686,7 @@ } if (auto GEP = dyn_cast(V)) { - APInt APOffset(DL.getPointerSizeInBits(0), 0); + APInt APOffset(DL.getIndexSizeInBits(0), 0); bool Result = GEP->accumulateConstantOffset(DL, APOffset); if (!Result) return false; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -276,14 +276,14 @@ // order the state machines in complexity of the generated code. Value *Idx = GEP->getOperand(2); - // If the index is larger than the pointer size of the target, truncate the - // index down like the GEP would do implicitly. We don't have to do this for - // an inbounds GEP because the index can't be out of range. + // If the index is larger than the pointer offset size of the target, truncate + // the index down like the GEP would do implicitly. We don't have to do this + // for an inbounds GEP because the index can't be out of range. if (!GEP->isInBounds()) { - Type *IntPtrTy = DL.getIntPtrType(GEP->getType()); - unsigned PtrSize = IntPtrTy->getIntegerBitWidth(); - if (Idx->getType()->getPrimitiveSizeInBits().getFixedValue() > PtrSize) - Idx = Builder.CreateTrunc(Idx, IntPtrTy); + Type *PtrIdxTy = DL.getIndexType(GEP->getType()); + unsigned OffsetSize = PtrIdxTy->getIntegerBitWidth(); + if (Idx->getType()->getPrimitiveSizeInBits().getFixedValue() > OffsetSize) + Idx = Builder.CreateTrunc(Idx, PtrIdxTy); } // If inbounds keyword is not present, Idx * ElementSize can overflow. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -219,7 +219,7 @@ // Now that I is pointing to the first non-allocation-inst in the block, // insert our getelementptr instruction... // - Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType()); + Type *IdxTy = IC.getDataLayout().getIndexType(AI.getType()); Value *NullIdx = Constant::getNullValue(IdxTy); Value *Idx[2] = {NullIdx, NullIdx}; Instruction *GEP = GetElementPtrInst::CreateInBounds( @@ -235,11 +235,12 @@ if (isa(AI.getArraySize())) return IC.replaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); - // Ensure that the alloca array size argument has type intptr_t, so that - // any casting is exposed early. - Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType()); - if (AI.getArraySize()->getType() != IntPtrTy) { - Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), IntPtrTy, false); + // Ensure that the alloca array size argument has type equal to the offset + // size of the alloca() pointer, which, in the tyical case, is intptr_t, + // so that any casting is exposed early. + Type *PtrIdxTy = IC.getDataLayout().getIndexType(AI.getType()); + if (AI.getArraySize()->getType() != PtrIdxTy) { + Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), PtrIdxTy, false); return IC.replaceOperand(AI, 0, V); } diff --git a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp --- a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp +++ b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -71,8 +71,8 @@ Value *Offset = SizeOffset.second; ConstantInt *SizeCI = dyn_cast(Size); - Type *IntTy = DL.getIntPtrType(Ptr->getType()); - Value *NeededSizeVal = ConstantInt::get(IntTy, NeededSize); + Type *IndexTy = DL.getIndexType(Ptr->getType()); + Value *NeededSizeVal = ConstantInt::get(IndexTy, NeededSize); auto SizeRange = SE.getUnsignedRange(SE.getSCEV(Size)); auto OffsetRange = SE.getUnsignedRange(SE.getSCEV(Offset)); @@ -97,7 +97,7 @@ Value *Or = IRB.CreateOr(Cmp2, Cmp3); if ((!SizeCI || SizeCI->getValue().slt(0)) && !SizeRange.getSignedMin().isNonNegative()) { - Value *Cmp1 = IRB.CreateICmpSLT(Offset, ConstantInt::get(IntTy, 0)); + Value *Cmp1 = IRB.CreateICmpSLT(Offset, ConstantInt::get(IndexTy, 0)); Or = IRB.CreateOr(Cmp1, Or); } diff --git a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp --- a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -410,8 +410,8 @@ // Get offset from the base GV. PointerType *GVPtrTy = cast(BaseGV->getType()); - IntegerType *PtrIntTy = DL->getIntPtrType(*Ctx, GVPtrTy->getAddressSpace()); - APInt Offset(DL->getTypeSizeInBits(PtrIntTy), /*val*/0, /*isSigned*/true); + IntegerType *OffsetTy = DL->getIndexType(*Ctx, GVPtrTy->getAddressSpace()); + APInt Offset(DL->getTypeSizeInBits(OffsetTy), /*val*/ 0, /*isSigned*/ true); auto *GEPO = cast(ConstExpr); // TODO: If we have a mix of inbounds and non-inbounds GEPs, then basing a @@ -432,7 +432,7 @@ // to be cheaper than compute it by , which can be lowered to // an ADD instruction or folded into Load/Store instruction. InstructionCost Cost = - TTI->getIntImmCostInst(Instruction::Add, 1, Offset, PtrIntTy, + TTI->getIntImmCostInst(Instruction::Add, 1, Offset, OffsetTy, TargetTransformInfo::TCK_SizeAndLatency, Inst); ConstCandVecType &ExprCandVec = ConstGEPCandMap[BaseGV]; ConstCandMapType::iterator Itr; diff --git a/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/llvm/lib/Transforms/Scalar/MergeICmps.cpp --- a/llvm/lib/Transforms/Scalar/MergeICmps.cpp +++ b/llvm/lib/Transforms/Scalar/MergeICmps.cpp @@ -157,7 +157,7 @@ return {}; } - APInt Offset = APInt(DL.getPointerTypeSizeInBits(Addr->getType()), 0); + APInt Offset = APInt(DL.getIndexTypeSizeInBits(Addr->getType()), 0); Value *Base = Addr; auto *GEP = dyn_cast(Addr); if (GEP) { diff --git a/llvm/lib/Transforms/Scalar/NaryReassociate.cpp b/llvm/lib/Transforms/Scalar/NaryReassociate.cpp --- a/llvm/lib/Transforms/Scalar/NaryReassociate.cpp +++ b/llvm/lib/Transforms/Scalar/NaryReassociate.cpp @@ -351,9 +351,9 @@ bool NaryReassociatePass::requiresSignExtension(Value *Index, GetElementPtrInst *GEP) { - unsigned PointerSizeInBits = - DL->getPointerSizeInBits(GEP->getType()->getPointerAddressSpace()); - return cast(Index->getType())->getBitWidth() < PointerSizeInBits; + unsigned IndexSizeInBits = + DL->getIndexSizeInBits(GEP->getType()->getPointerAddressSpace()); + return cast(Index->getType())->getBitWidth() < IndexSizeInBits; } GetElementPtrInst * @@ -449,12 +449,12 @@ return nullptr; // NewGEP = &Candidate[RHS * (sizeof(IndexedType) / sizeof(Candidate[0]))); - Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); - if (RHS->getType() != IntPtrTy) - RHS = Builder.CreateSExtOrTrunc(RHS, IntPtrTy); + Type *PtrIdxTy = DL->getIndexType(GEP->getType()); + if (RHS->getType() != PtrIdxTy) + RHS = Builder.CreateSExtOrTrunc(RHS, PtrIdxTy); if (IndexedSize != ElementSize) { RHS = Builder.CreateMul( - RHS, ConstantInt::get(IntPtrTy, IndexedSize / ElementSize)); + RHS, ConstantInt::get(PtrIdxTy, IndexedSize / ElementSize)); } GetElementPtrInst *NewGEP = cast( Builder.CreateGEP(GEP->getResultElementType(), Candidate, RHS)); diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -428,7 +428,7 @@ /// Returns true if the module changes. /// /// Verified in @i32_add in split-gep.ll - bool canonicalizeArrayIndicesToPointerSize(GetElementPtrInst *GEP); + bool canonicalizeArrayIndicesToIndexSize(GetElementPtrInst *GEP); /// Optimize sext(a)+sext(b) to sext(a+b) when a+b can't sign overflow. /// SeparateConstOffsetFromGEP distributes a sext to leaves before extracting @@ -791,17 +791,17 @@ .getSExtValue(); } -bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToPointerSize( +bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToIndexSize( GetElementPtrInst *GEP) { bool Changed = false; - Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); + Type *PtrIdxTy = DL->getIndexType(GEP->getType()); gep_type_iterator GTI = gep_type_begin(*GEP); for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end(); I != E; ++I, ++GTI) { // Skip struct member indices which must be i32. if (GTI.isSequential()) { - if ((*I)->getType() != IntPtrTy) { - *I = CastInst::CreateIntegerCast(*I, IntPtrTy, true, "idxprom", GEP); + if ((*I)->getType() != PtrIdxTy) { + *I = CastInst::CreateIntegerCast(*I, PtrIdxTy, true, "idxprom", GEP); Changed = true; } } @@ -849,7 +849,7 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs( GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset) { IRBuilder<> Builder(Variadic); - Type *IntPtrTy = DL->getIntPtrType(Variadic->getType()); + Type *PtrIndexTy = DL->getIndexType(Variadic->getType()); Type *I8PtrTy = Builder.getInt8PtrTy(Variadic->getType()->getPointerAddressSpace()); @@ -875,15 +875,16 @@ if (CI->isZero()) continue; - APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(), + APInt ElementSize = APInt(PtrIndexTy->getIntegerBitWidth(), DL->getTypeAllocSize(GTI.getIndexedType())); // Scale the index by element size. if (ElementSize != 1) { if (ElementSize.isPowerOf2()) { Idx = Builder.CreateShl( - Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2())); + Idx, ConstantInt::get(PtrIndexTy, ElementSize.logBase2())); } else { - Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize)); + Idx = + Builder.CreateMul(Idx, ConstantInt::get(PtrIndexTy, ElementSize)); } } // Create an ugly GEP with a single index for each index. @@ -896,7 +897,7 @@ // Create a GEP with the constant offset index. if (AccumulativeByteOffset != 0) { - Value *Offset = ConstantInt::get(IntPtrTy, AccumulativeByteOffset); + Value *Offset = ConstantInt::get(PtrIndexTy, AccumulativeByteOffset); ResultPtr = Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Offset, "uglygep"); } else @@ -922,6 +923,9 @@ int64_t AccumulativeByteOffset) { IRBuilder<> Builder(Variadic); Type *IntPtrTy = DL->getIntPtrType(Variadic->getType()); + assert(IntPtrTy == DL->getIndexType(Variadic->getType()) && + "Pointer type must match index type for arithmetic-based lowering of " + "split GEPs"); Value *ResultPtr = Builder.CreatePtrToInt(Variadic->getOperand(0), IntPtrTy); gep_type_iterator GTI = gep_type_begin(*Variadic); @@ -973,7 +977,7 @@ if (GEP->hasAllConstantIndices()) return false; - bool Changed = canonicalizeArrayIndicesToPointerSize(GEP); + bool Changed = canonicalizeArrayIndicesToIndexSize(GEP); bool NeedsExtraction; int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction); @@ -1057,7 +1061,15 @@ if (LowerGEP) { // As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to // arithmetic operations if the target uses alias analysis in codegen. - if (TTI.useAA()) + // Additionally, pointers that aren't integral (and so can't be safely + // converted to integers) or those whose offset size is different from their + // pointer size (which means that doing integer arithmetic on them could + // affect that data) can't be lowered in this way. + unsigned AddrSpace = GEP->getPointerAddressSpace(); + bool PointerHasExtraData = DL->getPointerSizeInBits(AddrSpace) != + DL->getIndexSizeInBits(AddrSpace); + if (TTI.useAA() || DL->isNonIntegralAddressSpace(AddrSpace) || + PointerHasExtraData) lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset); else lowerToArithmetics(GEP, AccumulativeByteOffset); @@ -1104,13 +1116,13 @@ // used with unsigned integers later. int64_t ElementTypeSizeOfGEP = static_cast( DL->getTypeAllocSize(GEP->getResultElementType())); - Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); + Type *PtrIdxTy = DL->getIndexType(GEP->getType()); if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) { // Very likely. As long as %gep is naturally aligned, the byte offset we // extracted should be a multiple of sizeof(*%gep). int64_t Index = AccumulativeByteOffset / ElementTypeSizeOfGEP; NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP, - ConstantInt::get(IntPtrTy, Index, true), + ConstantInt::get(PtrIdxTy, Index, true), GEP->getName(), GEP); NewGEP->copyMetadata(*GEP); // Inherit the inbounds attribute of the original GEP. @@ -1136,7 +1148,7 @@ NewGEP = cast(Builder.CreateGEP( Builder.getInt8Ty(), Builder.CreateBitCast(NewGEP, I8PtrTy), - {ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true)}, "uglygep", + {ConstantInt::get(PtrIdxTy, AccumulativeByteOffset, true)}, "uglygep", GEPWasInBounds)); NewGEP->copyMetadata(*GEP); diff --git a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp --- a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp @@ -484,9 +484,9 @@ // = B + (sext(Idx) * sext(S)) * ElementSize // = B + (sext(Idx) * ElementSize) * sext(S) // Casting to IntegerType is safe because we skipped vector GEPs. - IntegerType *IntPtrTy = cast(DL->getIntPtrType(I->getType())); + IntegerType *PtrIdxTy = cast(DL->getIndexType(I->getType())); ConstantInt *ScaledIdx = ConstantInt::get( - IntPtrTy, Idx->getSExtValue() * (int64_t)ElementSize, true); + PtrIdxTy, Idx->getSExtValue() * (int64_t)ElementSize, true); allocateCandidatesAndFindBasis(Candidate::GEP, B, ScaledIdx, S, I); } @@ -549,18 +549,18 @@ Value *ArrayIdx = GEP->getOperand(I); uint64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType()); if (ArrayIdx->getType()->getIntegerBitWidth() <= - DL->getPointerSizeInBits(GEP->getAddressSpace())) { - // Skip factoring if ArrayIdx is wider than the pointer size, because - // ArrayIdx is implicitly truncated to the pointer size. + DL->getIndexSizeInBits(GEP->getAddressSpace())) { + // Skip factoring if ArrayIdx is wider than the index size, because + // ArrayIdx is implicitly truncated to the index size. factorArrayIndex(ArrayIdx, BaseExpr, ElementSize, GEP); } // When ArrayIdx is the sext of a value, we try to factor that value as // well. Handling this case is important because array indices are - // typically sign-extended to the pointer size. + // typically sign-extended to the pointer index size. Value *TruncatedArrayIdx = nullptr; if (match(ArrayIdx, m_SExt(m_Value(TruncatedArrayIdx))) && TruncatedArrayIdx->getType()->getIntegerBitWidth() <= - DL->getPointerSizeInBits(GEP->getAddressSpace())) { + DL->getIndexSizeInBits(GEP->getAddressSpace())) { // Skip factoring if TruncatedArrayIdx is wider than the pointer size, // because TruncatedArrayIdx is implicitly truncated to the pointer size. factorArrayIndex(TruncatedArrayIdx, BaseExpr, ElementSize, GEP); @@ -675,24 +675,24 @@ } case Candidate::GEP: { - Type *IntPtrTy = DL->getIntPtrType(C.Ins->getType()); - bool InBounds = cast(C.Ins)->isInBounds(); - if (BumpWithUglyGEP) { - // C = (char *)Basis + Bump - unsigned AS = Basis.Ins->getType()->getPointerAddressSpace(); - Type *CharTy = Type::getInt8PtrTy(Basis.Ins->getContext(), AS); - Reduced = Builder.CreateBitCast(Basis.Ins, CharTy); - Reduced = - Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump, "", InBounds); - Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType()); - } else { - // C = gep Basis, Bump - // Canonicalize bump to pointer size. - Bump = Builder.CreateSExtOrTrunc(Bump, IntPtrTy); - Reduced = Builder.CreateGEP( - cast(Basis.Ins)->getResultElementType(), - Basis.Ins, Bump, "", InBounds); - } + Type *OffsetTy = DL->getIndexType(C.Ins->getType()); + bool InBounds = cast(C.Ins)->isInBounds(); + if (BumpWithUglyGEP) { + // C = (char *)Basis + Bump + unsigned AS = Basis.Ins->getType()->getPointerAddressSpace(); + Type *CharTy = Type::getInt8PtrTy(Basis.Ins->getContext(), AS); + Reduced = Builder.CreateBitCast(Basis.Ins, CharTy); + Reduced = + Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump, "", InBounds); + Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType()); + } else { + // C = gep Basis, Bump + // Canonicalize bump to pointer size. + Bump = Builder.CreateSExtOrTrunc(Bump, OffsetTy); + Reduced = Builder.CreateGEP( + cast(Basis.Ins)->getResultElementType(), Basis.Ins, + Bump, "", InBounds); + } break; } default: diff --git a/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/llvm/lib/Transforms/Utils/FunctionComparator.cpp --- a/llvm/lib/Transforms/Utils/FunctionComparator.cpp +++ b/llvm/lib/Transforms/Utils/FunctionComparator.cpp @@ -715,8 +715,8 @@ // When we have target data, we can reduce the GEP down to the value in bytes // added to the address. const DataLayout &DL = FnL->getParent()->getDataLayout(); - unsigned BitWidth = DL.getPointerSizeInBits(ASL); - APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0); + unsigned OffsetBitWidth = DL.getIndexSizeInBits(ASL); + APInt OffsetL(OffsetBitWidth, 0), OffsetR(OffsetBitWidth, 0); if (GEPL->accumulateConstantOffset(DL, OffsetL) && GEPR->accumulateConstantOffset(DL, OffsetR)) return cmpAPInts(OffsetL, OffsetR); diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -323,17 +323,17 @@ DL.getTypeStoreSize(PtrBTy->getScalarType())) return false; - unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA); - APInt Size(PtrBitWidth, DL.getTypeStoreSize(PtrATy)); + unsigned PtrOffsetWidth = DL.getIndexSizeInBits(ASA); + APInt Size(PtrOffsetWidth, DL.getTypeStoreSize(PtrATy)); return areConsecutivePointers(PtrA, PtrB, Size); } bool Vectorizer::areConsecutivePointers(Value *PtrA, Value *PtrB, APInt PtrDelta, unsigned Depth) const { - unsigned PtrBitWidth = DL.getPointerTypeSizeInBits(PtrA->getType()); - APInt OffsetA(PtrBitWidth, 0); - APInt OffsetB(PtrBitWidth, 0); + unsigned OffsetBitWidth = DL.getIndexTypeSizeInBits(PtrA->getType()); + APInt OffsetA(OffsetBitWidth, 0); + APInt OffsetB(OffsetBitWidth, 0); PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA); PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB); diff --git a/llvm/test/Instrumentation/BoundsChecking/simple.ll b/llvm/test/Instrumentation/BoundsChecking/simple.ll --- a/llvm/test/Instrumentation/BoundsChecking/simple.ll +++ b/llvm/test/Instrumentation/BoundsChecking/simple.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=bounds-checking -S | FileCheck %s -target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target datalayout = "e-p:64:64:64-p1:16:16:16-p2:64:64:64:48-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @.str = private constant [8 x i8] c"abcdefg\00" @.str_as1 = private addrspace(1) constant [8 x i8] c"abcdefg\00" +@.str_as2 = private addrspace(2) constant [8 x i8] c"abcdefg\00" + declare noalias ptr @malloc(i64) nounwind allocsize(0) declare noalias ptr @calloc(i64, i64) nounwind allocsize(0,1) @@ -145,6 +147,28 @@ ret void } +define void @f5_as2(i32 %x) nounwind {; +; CHECK-LABEL: @f5_as2( +; CHECK-NEXT: [[X_C:%.*]] = sext i32 [[X:%.*]] to i48 +; CHECK-NEXT: [[TMP1:%.*]] = add i48 0, [[X_C]] +; CHECK-NEXT: [[IDX:%.*]] = getelementptr inbounds [8 x i8], ptr addrspace(2) @.str_as2, i32 0, i32 [[X]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i48 8, [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i48 8, [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i48 [[TMP2]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]] +; CHECK-NEXT: br i1 [[TMP5]], label [[TRAP:%.*]], label [[TMP6:%.*]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr addrspace(2) [[IDX]], align 4 +; CHECK-NEXT: ret void +; CHECK: trap: +; CHECK-NEXT: call void @llvm.trap() #[[ATTR5]] +; CHECK-NEXT: unreachable +; + %idx = getelementptr inbounds [8 x i8], ptr addrspace(2) @.str_as2, i32 0, i32 %x + %1 = load i8, ptr addrspace(2) %idx, align 4 + ret void +} + define void @f6(i64 %x) nounwind { ; CHECK-LABEL: @f6( ; CHECK-NEXT: [[TMP1:%.*]] = alloca i128, align 8 diff --git a/llvm/test/Transforms/InstCombine/alloca-intptr-not-sizet.ll b/llvm/test/Transforms/InstCombine/alloca-intptr-not-sizet.ll --- a/llvm/test/Transforms/InstCombine/alloca-intptr-not-sizet.ll +++ b/llvm/test/Transforms/InstCombine/alloca-intptr-not-sizet.ll @@ -4,8 +4,8 @@ define void @test_array_alloca_intptr_not_sizet(i64 %size, ptr %dest) { ; CHECK-LABEL: @test_array_alloca_intptr_not_sizet( -; CHECK-NEXT: [[TMP1:%.*]] = zext i64 [[SIZE:%.*]] to i128 -; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i8, i128 [[TMP1]], align 1, addrspace(7) +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[SIZE:%.*]] to i32 +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i8, i32 [[TMP1]], align 1, addrspace(7) ; CHECK-NEXT: store ptr addrspace(7) [[ALLOCA]], ptr [[DEST:%.*]], align 16 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/InstCombine/load-cmp.ll b/llvm/test/Transforms/InstCombine/load-cmp.ll --- a/llvm/test/Transforms/InstCombine/load-cmp.ll +++ b/llvm/test/Transforms/InstCombine/load-cmp.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=instcombine -S -data-layout="p:32:32:32-p1:16:16:16-n8:16:32:64" < %s | FileCheck %s +; RUN: opt -passes=instcombine -S -data-layout="p:32:32:32-p1:16:16:16-p2:128:128:128:32-n8:16:32:64" < %s | FileCheck %s @G16 = internal constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85, i16 73, i16 82, i16 69, i16 68, i16 0] @@ -7,6 +7,9 @@ @G16_as1 = internal addrspace(1) constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85, i16 73, i16 82, i16 69, i16 68, i16 0] +@G16_as2 = internal addrspace(2) constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85, + i16 73, i16 82, i16 69, i16 68, i16 0] + @GD = internal constant [6 x double] [double -10.0, double 1.0, double 4.0, double 2.0, double -20.0, double -40.0] @@ -68,6 +71,19 @@ } +define i1 @test1_noinbounds_as2(i64 %x) { +; CHECK-LABEL: @test1_noinbounds_as2( +; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 2147483647 +; CHECK-NEXT: [[R:%.*]] = icmp eq i64 [[TMP1]], 9 +; CHECK-NEXT: ret i1 [[R]] +; + %p = getelementptr [10 x i16], ptr addrspace(2) @G16_as2, i16 0, i64 %x + %q = load i16, ptr addrspace(2) %p + %r = icmp eq i16 %q, 0 + ret i1 %r + +} + define i1 @test2(i32 %X) { ; CHECK-LABEL: @test2( ; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[X:%.*]], 4 diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-vectors.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-vectors.ll --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-vectors.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-vectors.ll @@ -1,6 +1,6 @@ ; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=load-store-vectorizer -S -o - %s | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:128:128:128:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" ; CHECK-LABEL: @merge_v2i32_v2i32( ; CHECK: load <4 x i32> @@ -72,6 +72,23 @@ ret void } +; CHECK-LABEL: @merge_fat_ptrs( +; CHECK: load <4 x i16> +; CHECK: store <4 x i16> zeroinitializer +define amdgpu_kernel void @merge_fat_ptrs(ptr addrspace(7) nocapture %a, ptr addrspace(7) nocapture readonly %b) #0 { +entry: + %a.1 = getelementptr inbounds <2 x i16>, ptr addrspace(7) %a, i32 1 + %b.1 = getelementptr inbounds <2 x i16>, ptr addrspace(7) %b, i32 1 + + %ld.c = load <2 x i16>, ptr addrspace(7) %b, align 4 + %ld.c.idx.1 = load <2 x i16>, ptr addrspace(7) %b.1, align 4 + + store <2 x i16> zeroinitializer, ptr addrspace(7) %a, align 4 + store <2 x i16> zeroinitializer, ptr addrspace(7) %a.1, align 4 + + ret void +} + ; Ideally this would be merged ; CHECK-LABEL: @merge_load_i32_v2i16( ; CHECK: load i32, diff --git a/llvm/test/Transforms/NaryReassociate/nary-gep.ll b/llvm/test/Transforms/NaryReassociate/nary-gep.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/NaryReassociate/nary-gep.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=nary-reassociate -S | FileCheck %s + +target datalayout = "e-p:64:64-p1:32:32:32-p2:128:128:128:32-i64:64-v16:16-v32:32-n32:64" + +; Even though addrspace(2) has 128 bit pointers, no sign extension needed +; becasue it has 32-bit offsets. +define void @no_sext_fat_pointer(ptr addrspace(2) %a, i32 %i, i32 %j) { +; CHECK-LABEL: @no_sext_fat_pointer( +; CHECK-NEXT: [[V2:%.*]] = getelementptr float, ptr addrspace(2) [[A:%.*]], i32 [[I:%.*]] +; CHECK-NEXT: call void @foo(ptr addrspace(2) [[V2]]) +; CHECK-NEXT: [[V3:%.*]] = getelementptr float, ptr addrspace(2) [[V2]], i32 [[J:%.*]] +; CHECK-NEXT: call void @foo(ptr addrspace(2) [[V3]]) +; CHECK-NEXT: ret void +; + %v1 = add i32 %i, %j + %v2 = getelementptr float, ptr addrspace(2) %a, i32 %i + call void @foo(ptr addrspace(2) %v2) + %v3 = getelementptr float, ptr addrspace(2) %a, i32 %v1 + call void @foo(ptr addrspace(2) %v3) + ret void +} + +declare void @foo(ptr addrspace(2)) diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/pointer-type-not-offset-type.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/pointer-type-not-offset-type.ll --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/pointer-type-not-offset-type.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/pointer-type-not-offset-type.ll @@ -4,8 +4,8 @@ define void @should_be_trunc(ptr addrspace(1) %ptr, i64 %index, ptr %result) { ; CHECK-LABEL: @should_be_trunc( -; CHECK-NEXT: [[IDXPROM:%.*]] = sext i64 [[INDEX:%.*]] to i128 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR:%.*]], i128 [[IDXPROM]] +; CHECK-NEXT: [[IDXPROM:%.*]] = trunc i64 [[INDEX:%.*]] to i32 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR:%.*]], i32 [[IDXPROM]] ; CHECK-NEXT: store ptr addrspace(1) [[GEP]], ptr [[RESULT:%.*]], align 16 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/StraightLineStrengthReduce/slsr-gep.ll b/llvm/test/Transforms/StraightLineStrengthReduce/slsr-gep.ll --- a/llvm/test/Transforms/StraightLineStrengthReduce/slsr-gep.ll +++ b/llvm/test/Transforms/StraightLineStrengthReduce/slsr-gep.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -passes=slsr,gvn -S | FileCheck %s ; RUN: opt < %s -passes='slsr,gvn' -S | FileCheck %s -target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64-p:64:64:64-p1:32:32:32" +target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64-p:64:64:64-p1:32:32:32-p2:128:128:128:32" ; foo(input[0]); ; foo(input[s]); @@ -183,6 +183,23 @@ ret void } +define void @slsr_gep_fat_pointer(ptr addrspace(2) %input, i32 %s) { + ; p1 = &input[s] + %p1 = getelementptr inbounds i32, ptr addrspace(2) %input, i32 %s + call void @baz2(ptr addrspace(2) %p1) + + ; p2 = &input[s * 2] + %s2 = mul nsw i32 %s, 2 + %p2 = getelementptr inbounds i32, ptr addrspace(2) %input, i32 %s2 +; CHECK: %p2 = getelementptr inbounds i32, ptr addrspace(2) %p1, i32 %s + ; Use index bitwidth, not pointer size (i128) + call void @baz2(ptr addrspace(2) %p2) + + ret void +} + + declare void @foo(ptr) declare void @bar(ptr) declare void @baz(ptr addrspace(1)) +declare void @baz2(ptr addrspace(2))