Index: include/llvm/IR/DataLayout.h =================================================================== --- include/llvm/IR/DataLayout.h +++ include/llvm/IR/DataLayout.h @@ -453,6 +453,12 @@ /// are set. unsigned getLargestLegalIntTypeSizeInBits() const; + /// \brief Returns the type of a GEP index. + /// It is a smaller type between IntPtrType and the largest supported + /// integer type. We can't take the IntPtrType for index, since the target may + /// not support full arithmetic for it. + Type *getIndexType(Type *PtrTy) const; + /// \brief Returns the offset from the beginning of the type for the specified /// indices. /// Index: lib/Analysis/ConstantFolding.cpp =================================================================== --- lib/Analysis/ConstantFolding.cpp +++ lib/Analysis/ConstantFolding.cpp @@ -730,8 +730,8 @@ Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef Ops, Type *ResultTy, Optional InRangeIndex, const DataLayout &DL, const TargetLibraryInfo *TLI) { - Type *IntPtrTy = DL.getIntPtrType(ResultTy); - Type *IntPtrScalarTy = IntPtrTy->getScalarType(); + Type *IndexTy = DL.getIndexType(ResultTy); + Type *IndexScalarTy = IndexTy->getScalarType(); bool Any = false; SmallVector NewIdxs; @@ -739,11 +739,11 @@ if ((i == 1 || !isa(GetElementPtrInst::getIndexedType( SrcElemTy, Ops.slice(1, i - 1)))) && - Ops[i]->getType()->getScalarType() != IntPtrScalarTy) { + Ops[i]->getType()->getScalarType() != IndexScalarTy) { Any = true; Type *NewType = Ops[i]->getType()->isVectorTy() - ? IntPtrTy - : IntPtrTy->getScalarType(); + ? IndexTy + : IndexScalarTy; NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i], true, NewType, @@ -803,33 +803,37 @@ if (!Ptr->getType()->isPointerTy()) return nullptr; + Type *IndexTy = DL.getIndexType(Ptr->getType()); Type *IntPtrTy = DL.getIntPtrType(Ptr->getType()); // If this is a constant expr gep that is effectively computing an // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12' - for (unsigned i = 1, e = Ops.size(); i != e; ++i) - if (!isa(Ops[i])) { - - // If this is "gep i8* Ptr, (sub 0, V)", fold this as: - // "inttoptr (sub (ptrtoint Ptr), V)" - if (Ops.size() == 2 && ResElemTy->isIntegerTy(8)) { - auto *CE = dyn_cast(Ops[1]); - assert((!CE || CE->getType() == IntPtrTy) && - "CastGEPIndices didn't canonicalize index types!"); - if (CE && CE->getOpcode() == Instruction::Sub && - CE->getOperand(0)->isNullValue()) { - Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType()); - Res = ConstantExpr::getSub(Res, CE->getOperand(1)); - Res = ConstantExpr::getIntToPtr(Res, ResTy); - if (auto *FoldedRes = ConstantFoldConstant(Res, DL, TLI)) - Res = FoldedRes; - return Res; + // The transformation is legal if GEP index and pointer have the same width. + if (IndexTy->getPrimitiveSizeInBits() == IntPtrTy->getPrimitiveSizeInBits()) { + for (unsigned i = 1, e = Ops.size(); i != e; ++i) { + if (!isa(Ops[i])) { + + // If this is "gep i8* Ptr, (sub 0, V)", fold this as: + // "inttoptr (sub (ptrtoint Ptr), V)" + if (Ops.size() == 2 && ResElemTy->isIntegerTy(8)) { + auto *CE = dyn_cast(Ops[1]); + assert((!CE || CE->getType() == IntPtrTy) && + "CastGEPIndices didn't canonicalize index types!"); + if (CE && CE->getOpcode() == Instruction::Sub && + CE->getOperand(0)->isNullValue()) { + Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType()); + Res = ConstantExpr::getSub(Res, CE->getOperand(1)); + Res = ConstantExpr::getIntToPtr(Res, ResTy); + if (auto *FoldedRes = ConstantFoldConstant(Res, DL, TLI)) + Res = FoldedRes; + return Res; + } } + return nullptr; } - return nullptr; } - - unsigned BitWidth = DL.getTypeSizeInBits(IntPtrTy); + } + unsigned BitWidth = DL.getTypeSizeInBits(IndexTy); APInt Offset = APInt(BitWidth, DL.getIndexedOffsetInType( @@ -909,7 +913,7 @@ // The element size is 0. This may be [0 x Ty]*, so just use a zero // index for this level and proceed to the next level to see if it can // accommodate the offset. - NewIdxs.push_back(ConstantInt::get(IntPtrTy, 0)); + NewIdxs.push_back(ConstantInt::get(IndexTy, 0)); } else { // The element size is non-zero divide the offset by the element // size (rounding down), to compute the index at this level. @@ -918,7 +922,7 @@ if (Overflow) break; Offset -= NewIdx * ElemSize; - NewIdxs.push_back(ConstantInt::get(IntPtrTy, NewIdx)); + NewIdxs.push_back(ConstantInt::get(IndexTy, NewIdx)); } } else { auto *STy = cast(Ty); Index: lib/IR/DataLayout.cpp =================================================================== --- lib/IR/DataLayout.cpp +++ lib/IR/DataLayout.cpp @@ -726,6 +726,19 @@ return Max != LegalIntWidths.end() ? *Max : 0; } +Type *DataLayout::getIndexType(Type *Ty) const { + assert(Ty->isPtrOrPtrVectorTy() && + "Expected a pointer or pointer vector type."); + unsigned PointerSize = getPointerTypeSizeInBits(Ty); + unsigned LargestIntSize = getLargestLegalIntTypeSizeInBits(); + unsigned NumBits = + LargestIntSize ? std::min(PointerSize, LargestIntSize) : PointerSize; + IntegerType *IntTy = IntegerType::get(Ty->getContext(), NumBits); + if (VectorType *VecTy = dyn_cast(Ty)) + return VectorType::get(IntTy, VecTy->getNumElements()); + return IntTy; +} + int64_t DataLayout::getIndexedOffsetInType(Type *ElemTy, ArrayRef Indices) const { int64_t Result = 0; Index: lib/Transforms/InstCombine/InstCombineCompares.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCompares.cpp +++ lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -681,8 +681,7 @@ // 3. Add the edges for the PHI nodes. // 4. Emit GEPs to get the original pointers. // 5. Remove the original instructions. - Type *IndexType = IntegerType::get( - Base->getContext(), DL.getPointerTypeSizeInBits(Start->getType())); + Type *IndexType = DL.getIndexType(Start->getType()); DenseMap NewInsts; NewInsts[Base] = ConstantInt::getNullValue(IndexType); @@ -789,8 +788,7 @@ /// the GEPs Pointer and Index. static std::pair getAsConstantIndexedAddress(Value *V, const DataLayout &DL) { - Type *IndexType = IntegerType::get(V->getContext(), - DL.getPointerTypeSizeInBits(V->getType())); + Type *IndexType = DL.getIndexType(V->getType()); Constant *Index = ConstantInt::getNullValue(IndexType); while (true) { Index: lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- lib/Transforms/InstCombine/InstructionCombining.cpp +++ lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1107,7 +1107,7 @@ // Start with the index over the outer type. Note that the type size // might be zero (even if the offset isn't zero) if the indexed type // is something like [0 x {int, int}] - Type *IntPtrTy = DL.getIntPtrType(PtrTy); + Type *IndexTy = DL.getIndexType(PtrTy); int64_t FirstIdx = 0; if (int64_t TySize = DL.getTypeAllocSize(Ty)) { FirstIdx = Offset/TySize; @@ -1122,7 +1122,7 @@ assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset"); } - NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx)); + NewIndices.push_back(ConstantInt::get(IndexTy, FirstIdx)); // Index into the types. If we fail, set OrigBase to null. while (Offset) { @@ -1144,7 +1144,7 @@ } else if (ArrayType *AT = dyn_cast(Ty)) { uint64_t EltSize = DL.getTypeAllocSize(AT->getElementType()); assert(EltSize && "Cannot index into a zero-sized array"); - NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize)); + NewIndices.push_back(ConstantInt::get(IndexTy,Offset/EltSize)); Offset %= EltSize; Ty = AT->getElementType(); } else { @@ -1507,8 +1507,11 @@ // Eliminate unneeded casts for indices, and replace indices which displace // by multiples of a zero size type with zero. bool MadeChange = false; - Type *IntPtrTy = - DL.getIntPtrType(GEP.getPointerOperandType()->getScalarType()); + + // Index width shouldn't have the same width as pointer. Data layout chooses + // the right type based on supported integer types. + Type *NewScalarIndexTy = + DL.getIndexType(GEP.getPointerOperandType()->getScalarType()); gep_type_iterator GTI = gep_type_begin(GEP); for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E; @@ -1517,10 +1520,11 @@ if (GTI.isStruct()) continue; - // Index type should have the same width as IntPtr Type *IndexTy = (*I)->getType(); - Type *NewIndexType = IndexTy->isVectorTy() ? - VectorType::get(IntPtrTy, IndexTy->getVectorNumElements()) : IntPtrTy; + Type *NewIndexType = + IndexTy->isVectorTy() + ? VectorType::get(NewScalarIndexTy, IndexTy->getVectorNumElements()) + : NewScalarIndexTy; // If the element type has zero size then any index over it is equivalent // to an index of zero, so replace it with zero if it is not zero already. @@ -1849,7 +1853,7 @@ if (SrcElTy->isArrayTy() && DL.getTypeAllocSize(SrcElTy->getArrayElementType()) == DL.getTypeAllocSize(ResElTy)) { - Type *IdxType = DL.getIntPtrType(GEP.getType()); + Type *IdxType = DL.getIndexType(GEP.getType()); Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) }; Value *NewGEP = GEP.isInBounds() @@ -1876,10 +1880,11 @@ unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits(); uint64_t Scale = SrcSize / ResSize; - // Earlier transforms ensure that the index has type IntPtrType, which - // considerably simplifies the logic by eliminating implicit casts. - assert(Idx->getType() == DL.getIntPtrType(GEP.getType()) && - "Index not cast to pointer width?"); + // Earlier transforms ensure that the index has the right type + // according to Data Layout, which considerably simplifies the + // logic by eliminating implicit casts. + assert(Idx->getType() == DL.getIndexType(GEP.getType()) && + "Index type does not match the Data Layout preferences"); bool NSW; if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) { @@ -1915,19 +1920,19 @@ unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits(); uint64_t Scale = ArrayEltSize / ResSize; - // Earlier transforms ensure that the index has type IntPtrType, which - // considerably simplifies the logic by eliminating implicit casts. - assert(Idx->getType() == DL.getIntPtrType(GEP.getType()) && - "Index not cast to pointer width?"); + // Earlier transforms ensure that the index has the right type + // according to the Data Layout, which considerably simplifies + // the logic by eliminating implicit casts. + assert(Idx->getType() == DL.getIndexType(GEP.getType()) && + "Index type does not match the Data Layout preferences"); bool NSW; if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) { // Successfully decomposed Idx as NewIdx * Scale, form a new GEP. // If the multiplication NewIdx * Scale may overflow then the new // GEP may not be "inbounds". - Value *Off[2] = { - Constant::getNullValue(DL.getIntPtrType(GEP.getType())), - NewIdx}; + Type *IndTy = DL.getIndexType(GEP.getType()); + Value *Off[2] = {Constant::getNullValue(IndTy), NewIdx}; Value *NewGEP = GEP.isInBounds() && NSW ? Builder.CreateInBoundsGEP( Index: test/Transforms/InstCombine/gep-custom-dl.ll =================================================================== --- test/Transforms/InstCombine/gep-custom-dl.ll +++ test/Transforms/InstCombine/gep-custom-dl.ll @@ -0,0 +1,155 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-m:m-p:40:64:64-i32:32-i16:16-i8:8-n32" + +%struct.B = type { double } +%struct.A = type { %struct.B, i32, i32 } +%struct.C = type { [7 x i8] } + + +@Global = constant [10 x i8] c"helloworld" + + +; Test that two array indexing geps fold +define i32* @test1(i32* %I) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: [[B:%.*]] = getelementptr i32, i32* [[I:%.*]], i32 21 +; CHECK-NEXT: ret i32* [[B]] +; + %A = getelementptr i32, i32* %I, i8 17 + %B = getelementptr i32, i32* %A, i16 4 + ret i32* %B +} + +; Test that two getelementptr insts fold +define i32* @test2({ i32 }* %I) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: [[B:%.*]] = getelementptr { i32 }, { i32 }* [[I:%.*]], i32 1, i32 0 +; CHECK-NEXT: ret i32* [[B]] +; + %A = getelementptr { i32 }, { i32 }* %I, i32 1 + %B = getelementptr { i32 }, { i32 }* %A, i32 0, i32 0 + ret i32* %B +} + +define void @test3(i8 %B) { +; This should be turned into a constexpr instead of being an instruction +; CHECK-LABEL: @test3( +; CHECK-NEXT: store i8 [[B:%.*]], i8* getelementptr inbounds ([10 x i8], [10 x i8]* @Global, i32 0, i32 4), align 1 +; CHECK-NEXT: ret void +; + %A = getelementptr [10 x i8], [10 x i8]* @Global, i32 0, i32 4 + store i8 %B, i8* %A + ret void +} + +%as1_ptr_struct = type { i32 addrspace(1)* } +%as2_ptr_struct = type { i32 addrspace(2)* } + +@global_as2 = addrspace(2) global i32 zeroinitializer +@global_as1_as2_ptr = addrspace(1) global %as2_ptr_struct { i32 addrspace(2)* @global_as2 } + +; This should be turned into a constexpr instead of being an instruction +define void @test_evaluate_gep_nested_as_ptrs(i32 addrspace(2)* %B) { +; CHECK-LABEL: @test_evaluate_gep_nested_as_ptrs( +; CHECK-NEXT: store i32 addrspace(2)* [[B:%.*]], i32 addrspace(2)* addrspace(1)* getelementptr inbounds (%as2_ptr_struct, [[AS2_PTR_STRUCT:%.*]] addrspace(1)* @global_as1_as2_ptr, i32 0, i32 0), align 8 +; CHECK-NEXT: ret void +; + %A = getelementptr %as2_ptr_struct, %as2_ptr_struct addrspace(1)* @global_as1_as2_ptr, i32 0, i32 0 + store i32 addrspace(2)* %B, i32 addrspace(2)* addrspace(1)* %A + ret void +} + +@arst = addrspace(1) global [4 x i8 addrspace(2)*] zeroinitializer + +define void @test_evaluate_gep_as_ptrs_array(i8 addrspace(2)* %B) { +; CHECK-LABEL: @test_evaluate_gep_as_ptrs_array( +; CHECK-NEXT: store i8 addrspace(2)* [[B:%.*]], i8 addrspace(2)* addrspace(1)* getelementptr inbounds ([4 x i8 addrspace(2)*], [4 x i8 addrspace(2)*] addrspace(1)* @arst, i32 0, i32 2), align 16 +; CHECK-NEXT: ret void +; + + %A = getelementptr [4 x i8 addrspace(2)*], [4 x i8 addrspace(2)*] addrspace(1)* @arst, i16 0, i16 2 + store i8 addrspace(2)* %B, i8 addrspace(2)* addrspace(1)* %A + ret void +} + +define i32* @test4(i32* %I, i32 %C, i32 %D) { +; CHECK-LABEL: @test4( +; CHECK-NEXT: [[A:%.*]] = getelementptr i32, i32* [[I:%.*]], i32 [[C:%.*]] +; CHECK-NEXT: [[B:%.*]] = getelementptr i32, i32* [[A]], i32 [[D:%.*]] +; CHECK-NEXT: ret i32* [[B]] +; + %A = getelementptr i32, i32* %I, i32 %C + %B = getelementptr i32, i32* %A, i32 %D + ret i32* %B +} + + +define i1 @test5({ i32, i32 }* %x, { i32, i32 }* %y) { +; CHECK-LABEL: @test5( +; CHECK-NEXT: [[TMP_4:%.*]] = icmp eq { i32, i32 }* [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[TMP_4]] +; + %tmp.1 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 1 + %tmp.3 = getelementptr { i32, i32 }, { i32, i32 }* %y, i32 0, i32 1 + ;; seteq x, y + %tmp.4 = icmp eq i32* %tmp.1, %tmp.3 + ret i1 %tmp.4 +} + +%S = type { i32, [ 100 x i32] } + +define <2 x i1> @test6(<2 x i32> %X, <2 x %S*> %P) nounwind { +; CHECK-LABEL: @test6( +; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[X:%.*]], +; CHECK-NEXT: ret <2 x i1> [[C]] +; + %A = getelementptr inbounds %S, <2 x %S*> %P, <2 x i32> zeroinitializer, <2 x i32> , <2 x i32> %X + %B = getelementptr inbounds %S, <2 x %S*> %P, <2 x i32> , <2 x i32> + %C = icmp eq <2 x i32*> %A, %B + ret <2 x i1> %C +} + +@G = external global [3 x i8] +define i8* @test7(i16 %Idx) { +; CHECK-LABEL: @test7( +; CHECK-NEXT: [[ZE_IDX:%.*]] = zext i16 [[IDX:%.*]] to i32 +; CHECK-NEXT: [[TMP:%.*]] = getelementptr [3 x i8], [3 x i8]* @G, i32 0, i32 [[ZE_IDX]] +; CHECK-NEXT: ret i8* [[TMP]] +; + %ZE_Idx = zext i16 %Idx to i32 + %tmp = getelementptr i8, i8* getelementptr ([3 x i8], [3 x i8]* @G, i32 0, i32 0), i32 %ZE_Idx + ret i8* %tmp +} + + +; Test folding of constantexpr geps into normal geps. +@Array = external global [40 x i32] +define i32 *@test8(i32 %X) { +; CHECK-LABEL: @test8( +; CHECK-NEXT: [[A:%.*]] = getelementptr [40 x i32], [40 x i32]* @Array, i32 0, i32 [[X:%.*]] +; CHECK-NEXT: ret i32* [[A]] +; + %A = getelementptr i32, i32* getelementptr ([40 x i32], [40 x i32]* @Array, i32 0, i32 0), i32 %X + ret i32* %A +} + +define i32 *@test9(i32 *%base, i8 %ind) { +; CHECK-LABEL: @test9( +; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[IND:%.*]] to i32 +; CHECK-NEXT: [[RES:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i32 [[TMP1]] +; CHECK-NEXT: ret i32* [[RES]] +; + %res = getelementptr i32, i32 *%base, i8 %ind + ret i32* %res +} + +define i32 @test10() { +; CHECK-LABEL: @test10( +; CHECK-NEXT: ret i32 8 +; + %A = getelementptr { i32, double }, { i32, double }* null, i32 0, i32 1 + %B = ptrtoint double* %A to i32 + ret i32 %B +} Index: test/Transforms/InstCombine/memcmp-constant-fold.ll =================================================================== --- test/Transforms/InstCombine/memcmp-constant-fold.ll +++ test/Transforms/InstCombine/memcmp-constant-fold.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S -data-layout=e-n32 | FileCheck %s --check-prefix=ALL --check-prefix=LE ; RUN: opt < %s -instcombine -S -data-layout=E-n32 | FileCheck %s --check-prefix=ALL --check-prefix=BE @@ -54,7 +55,7 @@ define i1 @memcmp_3bytes_aligned_constant_i32(i8* align 4 %x) { ; ALL-LABEL: @memcmp_3bytes_aligned_constant_i32( -; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* bitcast (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @intbuf, i64 0, i64 1) to i8*), i8* bitcast ([2 x i32]* @intbuf to i8*), i64 3) +; ALL-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* bitcast (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @intbuf, i32 0, i32 1) to i8*), i8* bitcast ([2 x i32]* @intbuf to i8*), i64 3) ; ALL-NEXT: [[CMPEQ0:%.*]] = icmp eq i32 [[CALL]], 0 ; ALL-NEXT: ret i1 [[CMPEQ0]] ;