Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -1981,12 +1981,15 @@ ``A
`` Specifies the address space of objects created by '``alloca``'. Defaults to the default address space of 0. -``p[n]::::`` +``p[n]::[:[:[:]]]`` This specifies the *size* of a pointer and its ```` and ````\erred alignments for address space ``n``. The fourth parameter ```` is a size of index that used for address calculation. If not specified, the default index size is equal to the pointer size. All sizes - are in bits. The address space, ``n``, is optional, and if not specified, + are in bits. ```` is the number of bits in addresses of global + values. The high bits are assumed to be 0. If not specified, this is equal + to pointer size. + The address space, ``n``, is optional, and if not specified, denotes the default address space 0. The value of ``n`` must be in the range [1,2^23). ``i::`` Index: include/llvm/IR/DataLayout.h =================================================================== --- include/llvm/IR/DataLayout.h +++ include/llvm/IR/DataLayout.h @@ -93,11 +93,12 @@ uint32_t TypeByteWidth; uint32_t AddressSpace; uint32_t IndexWidth; + uint32_t GlobalBitWidth; /// Initializer static PointerAlignElem get(uint32_t AddressSpace, unsigned ABIAlign, unsigned PrefAlign, uint32_t TypeByteWidth, - uint32_t IndexWidth); + uint32_t IndexWidth, uint32_t GlobalBitWidth); bool operator==(const PointerAlignElem &rhs) const; }; @@ -169,7 +170,7 @@ bool ABIAlign, Type *Ty) const; void setPointerAlignment(uint32_t AddrSpace, unsigned ABIAlign, unsigned PrefAlign, uint32_t TypeByteWidth, - uint32_t IndexWidth); + uint32_t IndexWidth, uint32_t GlobalBitWidth); /// Internal helper method that returns requested alignment for type. unsigned getAlignment(Type *Ty, bool abi_or_pref) const; @@ -334,6 +335,10 @@ /// the backends/clients are updated. unsigned getPointerSize(unsigned AS = 0) const; + /// Return bit width of global value addresses. Higher bits can be assumed + /// to be 0. + unsigned getPointerGlobalBitWidth(unsigned AS) const; + // Index size used for address calculation. unsigned getIndexSize(unsigned AS) const; Index: lib/Analysis/ValueTracking.cpp =================================================================== --- lib/Analysis/ValueTracking.cpp +++ lib/Analysis/ValueTracking.cpp @@ -1293,7 +1293,9 @@ KnownBits LocalKnown(BitWidth); computeKnownBits(I->getOperand(0), LocalKnown, Depth + 1, Q); unsigned TrailZ = LocalKnown.countMinTrailingZeros(); + unsigned LeadZ = LocalKnown.countMinLeadingZeros(); + uint64_t StructOffset = 0; gep_type_iterator GTI = gep_type_begin(I); for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) { Value *Index = I->getOperand(i); @@ -1310,14 +1312,13 @@ unsigned Idx = cast(Index)->getZExtValue(); const StructLayout *SL = Q.DL.getStructLayout(STy); - uint64_t Offset = SL->getElementOffset(Idx); - TrailZ = std::min(TrailZ, - countTrailingZeros(Offset)); + StructOffset += SL->getElementOffset(Idx); } else { // Handle array index arithmetic. Type *IndexedTy = GTI.getIndexedType(); if (!IndexedTy->isSized()) { TrailZ = 0; + LeadZ = 0; break; } unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits(); @@ -1327,10 +1328,33 @@ TrailZ = std::min(TrailZ, unsigned(countTrailingZeros(TypeSize) + LocalKnown.countMinTrailingZeros())); + if (!LocalKnown.isNonNegative()) { + LeadZ = 0; + } else if (!LocalKnown.isZero()) { + // Bit width for (Index * TypeSize). + // + // In the common case where TypeSize is a power-of-two this is just + // the bit width of Index + log2(TypeSize). Else it's bounded by the + // bit width of TypeSize + bit width of Index. + unsigned OffsetWidth = std::min( + BitWidth, GEPOpiBits - LocalKnown.countMinLeadingZeros() + + Log2_64_Ceil(TypeSize)); + LeadZ = std::min(LeadZ, BitWidth - OffsetWidth); + if (LeadZ > 0) + LeadZ--; + } } } + if (StructOffset) { + TrailZ = std::min(TrailZ, countTrailingZeros(StructOffset)); + LeadZ = std::min(LeadZ, BitWidth - (Log2_64(StructOffset) + 1)); + if (LeadZ > 0) + LeadZ--; + } + Known.Zero.setLowBits(TrailZ); + Known.Zero.setHighBits(LeadZ); break; } case Instruction::PHI: { @@ -1659,6 +1683,13 @@ unsigned Align = V->getPointerAlignment(Q.DL); if (Align) Known.Zero.setLowBits(countTrailingZeros(Align)); + + if (isa(V)) { + unsigned AS = cast(V->getType())->getAddressSpace(); + unsigned Bits = Q.DL.getPointerGlobalBitWidth(AS); + assert(Bits <= BitWidth); + Known.Zero.setBits(Bits, BitWidth); + } } // computeKnownBitsFromAssume strictly refines Known. Index: lib/IR/DataLayout.cpp =================================================================== --- lib/IR/DataLayout.cpp +++ lib/IR/DataLayout.cpp @@ -130,7 +130,7 @@ PointerAlignElem PointerAlignElem::get(uint32_t AddressSpace, unsigned ABIAlign, unsigned PrefAlign, uint32_t TypeByteWidth, - uint32_t IndexWidth) { + uint32_t IndexWidth, uint32_t GlobalBitWidth) { assert(ABIAlign <= PrefAlign && "Preferred alignment worse than ABI!"); PointerAlignElem retval; retval.AddressSpace = AddressSpace; @@ -138,6 +138,7 @@ retval.PrefAlign = PrefAlign; retval.TypeByteWidth = TypeByteWidth; retval.IndexWidth = IndexWidth; + retval.GlobalBitWidth = GlobalBitWidth; return retval; } @@ -147,7 +148,8 @@ && AddressSpace == rhs.AddressSpace && PrefAlign == rhs.PrefAlign && TypeByteWidth == rhs.TypeByteWidth - && IndexWidth == rhs.IndexWidth); + && IndexWidth == rhs.IndexWidth + && GlobalBitWidth == rhs.GlobalBitWidth); } //===----------------------------------------------------------------------===// @@ -193,7 +195,7 @@ setAlignment((AlignTypeEnum)E.AlignType, E.ABIAlign, E.PrefAlign, E.TypeBitWidth); } - setPointerAlignment(0, 8, 8, 8, 8); + setPointerAlignment(0, 8, 8, 8, 8, 64); parseSpecifier(Desc); } @@ -304,6 +306,10 @@ // Preferred alignment. unsigned PointerPrefAlign = PointerABIAlign; + + // Size of global value pointers. This default assumes 8-bit bytes. + unsigned GlobalBitWidth = 8 * PointerMemSize; + if (!Rest.empty()) { Split = split(Rest, ':'); PointerPrefAlign = inBytes(getInt(Tok)); @@ -317,10 +323,17 @@ IndexSize = inBytes(getInt(Tok)); if (!IndexSize) report_fatal_error("Invalid index size of 0 bytes"); + + if (!Rest.empty()) { + Split = split(Rest, ':'); + GlobalBitWidth = getInt(Tok); + if (!GlobalBitWidth) + report_fatal_error("Invalid global bit width of 0 bits"); + } } } setPointerAlignment(AddrSpace, PointerABIAlign, PointerPrefAlign, - PointerMemSize, IndexSize); + PointerMemSize, IndexSize, GlobalBitWidth); break; } case 'i': @@ -493,7 +506,8 @@ void DataLayout::setPointerAlignment(uint32_t AddrSpace, unsigned ABIAlign, unsigned PrefAlign, uint32_t TypeByteWidth, - uint32_t IndexWidth) { + uint32_t IndexWidth, + uint32_t GlobalBitWidth) { if (PrefAlign < ABIAlign) report_fatal_error( "Preferred alignment cannot be less than the ABI alignment"); @@ -501,12 +515,14 @@ PointersTy::iterator I = findPointerLowerBound(AddrSpace); if (I == Pointers.end() || I->AddressSpace != AddrSpace) { Pointers.insert(I, PointerAlignElem::get(AddrSpace, ABIAlign, PrefAlign, - TypeByteWidth, IndexWidth)); + TypeByteWidth, IndexWidth, + GlobalBitWidth)); } else { I->ABIAlign = ABIAlign; I->PrefAlign = PrefAlign; I->TypeByteWidth = TypeByteWidth; I->IndexWidth = IndexWidth; + I->GlobalBitWidth = GlobalBitWidth; } } @@ -635,6 +651,15 @@ return I->TypeByteWidth; } +unsigned DataLayout::getPointerGlobalBitWidth(unsigned AS) const { + PointersTy::const_iterator I = findPointerLowerBound(AS); + if (I == Pointers.end() || I->AddressSpace != AS) { + I = findPointerLowerBound(0); + assert(I->AddressSpace == 0); + } + return I->GlobalBitWidth; +} + unsigned DataLayout::getPointerTypeSizeInBits(Type *Ty) const { assert(Ty->isPtrOrPtrVectorTy() && "This should only be called with a pointer or pointer vector type"); Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -271,7 +271,8 @@ // 32-bit private, local, and region pointers. 64-bit global, constant and // flat. - return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32" + return "e-p:64:64-p1:64:64-p2:32:32:32:32:16-p3:32:32:32:32:16" + "-p4:64:64-p5:32:32-p6:32:32" "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"; } Index: test/Transforms/LoadStoreVectorizer/AMDGPU/ds-bounds.ll =================================================================== --- test/Transforms/LoadStoreVectorizer/AMDGPU/ds-bounds.ll +++ test/Transforms/LoadStoreVectorizer/AMDGPU/ds-bounds.ll @@ -18,12 +18,7 @@ ; ALL-LABEL: @store_global_const_idx( -; -; TODO: Addresses are known-positive, this could be merged! -; SI: store i32 -; SI: store i32 -; -; NONSI: store <2 x i32> , <2 x i32> addrspace(3)* %0, align 4 +; ALL: store <2 x i32> , <2 x i32> addrspace(3)* %0, align 4 define amdgpu_cs void @store_global_const_idx() #0 { entry: %ptr.a = getelementptr [512 x i32], [512 x i32] addrspace(3)* @compute_lds, i32 0, i32 3 @@ -51,12 +46,7 @@ ; ALL-LABEL: @store_global_var_idx_case2( -; -; TODO: Addresses are known-positive, this could be merged! -; SI: store i32 -; SI: store i32 -; -; NONSI: store <2 x i32> , <2 x i32> addrspace(3)* %0, align 4 +; ALL: store <2 x i32> , <2 x i32> addrspace(3)* %0, align 4 define amdgpu_cs void @store_global_var_idx_case2(i32 %idx) #0 { entry: %idx.and = and i32 %idx, 255