Index: docs/LangRef.rst
===================================================================
--- docs/LangRef.rst
+++ docs/LangRef.rst
@@ -1981,12 +1981,15 @@
``A
``
Specifies the address space of objects created by '``alloca``'.
Defaults to the default address space of 0.
-``p[n]::::``
+``p[n]::[:[:[:]]]``
This specifies the *size* of a pointer and its ```` and
````\erred alignments for address space ``n``. The fourth parameter
```` is a size of index that used for address calculation. If not
specified, the default index size is equal to the pointer size. All sizes
- are in bits. The address space, ``n``, is optional, and if not specified,
+ are in bits. ```` is the number of bits in addresses of global
+ values. The high bits are assumed to be 0. If not specified, this is equal
+ to pointer size.
+ The address space, ``n``, is optional, and if not specified,
denotes the default address space 0. The value of ``n`` must be
in the range [1,2^23).
``i::``
Index: include/llvm/IR/DataLayout.h
===================================================================
--- include/llvm/IR/DataLayout.h
+++ include/llvm/IR/DataLayout.h
@@ -93,11 +93,12 @@
uint32_t TypeByteWidth;
uint32_t AddressSpace;
uint32_t IndexWidth;
+ uint32_t GlobalBitWidth;
/// Initializer
static PointerAlignElem get(uint32_t AddressSpace, unsigned ABIAlign,
unsigned PrefAlign, uint32_t TypeByteWidth,
- uint32_t IndexWidth);
+ uint32_t IndexWidth, uint32_t GlobalBitWidth);
bool operator==(const PointerAlignElem &rhs) const;
};
@@ -169,7 +170,7 @@
bool ABIAlign, Type *Ty) const;
void setPointerAlignment(uint32_t AddrSpace, unsigned ABIAlign,
unsigned PrefAlign, uint32_t TypeByteWidth,
- uint32_t IndexWidth);
+ uint32_t IndexWidth, uint32_t GlobalBitWidth);
/// Internal helper method that returns requested alignment for type.
unsigned getAlignment(Type *Ty, bool abi_or_pref) const;
@@ -334,6 +335,10 @@
/// the backends/clients are updated.
unsigned getPointerSize(unsigned AS = 0) const;
+ /// Return bit width of global value addresses. Higher bits can be assumed
+ /// to be 0.
+ unsigned getPointerGlobalBitWidth(unsigned AS) const;
+
// Index size used for address calculation.
unsigned getIndexSize(unsigned AS) const;
Index: lib/Analysis/ValueTracking.cpp
===================================================================
--- lib/Analysis/ValueTracking.cpp
+++ lib/Analysis/ValueTracking.cpp
@@ -1293,7 +1293,9 @@
KnownBits LocalKnown(BitWidth);
computeKnownBits(I->getOperand(0), LocalKnown, Depth + 1, Q);
unsigned TrailZ = LocalKnown.countMinTrailingZeros();
+ unsigned LeadZ = LocalKnown.countMinLeadingZeros();
+ uint64_t StructOffset = 0;
gep_type_iterator GTI = gep_type_begin(I);
for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
Value *Index = I->getOperand(i);
@@ -1310,14 +1312,13 @@
unsigned Idx = cast(Index)->getZExtValue();
const StructLayout *SL = Q.DL.getStructLayout(STy);
- uint64_t Offset = SL->getElementOffset(Idx);
- TrailZ = std::min(TrailZ,
- countTrailingZeros(Offset));
+ StructOffset += SL->getElementOffset(Idx);
} else {
// Handle array index arithmetic.
Type *IndexedTy = GTI.getIndexedType();
if (!IndexedTy->isSized()) {
TrailZ = 0;
+ LeadZ = 0;
break;
}
unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits();
@@ -1327,10 +1328,33 @@
TrailZ = std::min(TrailZ,
unsigned(countTrailingZeros(TypeSize) +
LocalKnown.countMinTrailingZeros()));
+ if (!LocalKnown.isNonNegative()) {
+ LeadZ = 0;
+ } else if (!LocalKnown.isZero()) {
+ // Bit width for (Index * TypeSize).
+ //
+ // In the common case where TypeSize is a power-of-two this is just
+ // the bit width of Index + log2(TypeSize). Else it's bounded by the
+ // bit width of TypeSize + bit width of Index.
+ unsigned OffsetWidth = std::min(
+ BitWidth, GEPOpiBits - LocalKnown.countMinLeadingZeros() +
+ Log2_64_Ceil(TypeSize));
+ LeadZ = std::min(LeadZ, BitWidth - OffsetWidth);
+ if (LeadZ > 0)
+ LeadZ--;
+ }
}
}
+ if (StructOffset) {
+ TrailZ = std::min(TrailZ, countTrailingZeros(StructOffset));
+ LeadZ = std::min(LeadZ, BitWidth - (Log2_64(StructOffset) + 1));
+ if (LeadZ > 0)
+ LeadZ--;
+ }
+
Known.Zero.setLowBits(TrailZ);
+ Known.Zero.setHighBits(LeadZ);
break;
}
case Instruction::PHI: {
@@ -1659,6 +1683,13 @@
unsigned Align = V->getPointerAlignment(Q.DL);
if (Align)
Known.Zero.setLowBits(countTrailingZeros(Align));
+
+ if (isa(V)) {
+ unsigned AS = cast(V->getType())->getAddressSpace();
+ unsigned Bits = Q.DL.getPointerGlobalBitWidth(AS);
+ assert(Bits <= BitWidth);
+ Known.Zero.setBits(Bits, BitWidth);
+ }
}
// computeKnownBitsFromAssume strictly refines Known.
Index: lib/IR/DataLayout.cpp
===================================================================
--- lib/IR/DataLayout.cpp
+++ lib/IR/DataLayout.cpp
@@ -130,7 +130,7 @@
PointerAlignElem
PointerAlignElem::get(uint32_t AddressSpace, unsigned ABIAlign,
unsigned PrefAlign, uint32_t TypeByteWidth,
- uint32_t IndexWidth) {
+ uint32_t IndexWidth, uint32_t GlobalBitWidth) {
assert(ABIAlign <= PrefAlign && "Preferred alignment worse than ABI!");
PointerAlignElem retval;
retval.AddressSpace = AddressSpace;
@@ -138,6 +138,7 @@
retval.PrefAlign = PrefAlign;
retval.TypeByteWidth = TypeByteWidth;
retval.IndexWidth = IndexWidth;
+ retval.GlobalBitWidth = GlobalBitWidth;
return retval;
}
@@ -147,7 +148,8 @@
&& AddressSpace == rhs.AddressSpace
&& PrefAlign == rhs.PrefAlign
&& TypeByteWidth == rhs.TypeByteWidth
- && IndexWidth == rhs.IndexWidth);
+ && IndexWidth == rhs.IndexWidth
+ && GlobalBitWidth == rhs.GlobalBitWidth);
}
//===----------------------------------------------------------------------===//
@@ -193,7 +195,7 @@
setAlignment((AlignTypeEnum)E.AlignType, E.ABIAlign, E.PrefAlign,
E.TypeBitWidth);
}
- setPointerAlignment(0, 8, 8, 8, 8);
+ setPointerAlignment(0, 8, 8, 8, 8, 64);
parseSpecifier(Desc);
}
@@ -304,6 +306,10 @@
// Preferred alignment.
unsigned PointerPrefAlign = PointerABIAlign;
+
+ // Size of global value pointers. This default assumes 8-bit bytes.
+ unsigned GlobalBitWidth = 8 * PointerMemSize;
+
if (!Rest.empty()) {
Split = split(Rest, ':');
PointerPrefAlign = inBytes(getInt(Tok));
@@ -317,10 +323,17 @@
IndexSize = inBytes(getInt(Tok));
if (!IndexSize)
report_fatal_error("Invalid index size of 0 bytes");
+
+ if (!Rest.empty()) {
+ Split = split(Rest, ':');
+ GlobalBitWidth = getInt(Tok);
+ if (!GlobalBitWidth)
+ report_fatal_error("Invalid global bit width of 0 bits");
+ }
}
}
setPointerAlignment(AddrSpace, PointerABIAlign, PointerPrefAlign,
- PointerMemSize, IndexSize);
+ PointerMemSize, IndexSize, GlobalBitWidth);
break;
}
case 'i':
@@ -493,7 +506,8 @@
void DataLayout::setPointerAlignment(uint32_t AddrSpace, unsigned ABIAlign,
unsigned PrefAlign, uint32_t TypeByteWidth,
- uint32_t IndexWidth) {
+ uint32_t IndexWidth,
+ uint32_t GlobalBitWidth) {
if (PrefAlign < ABIAlign)
report_fatal_error(
"Preferred alignment cannot be less than the ABI alignment");
@@ -501,12 +515,14 @@
PointersTy::iterator I = findPointerLowerBound(AddrSpace);
if (I == Pointers.end() || I->AddressSpace != AddrSpace) {
Pointers.insert(I, PointerAlignElem::get(AddrSpace, ABIAlign, PrefAlign,
- TypeByteWidth, IndexWidth));
+ TypeByteWidth, IndexWidth,
+ GlobalBitWidth));
} else {
I->ABIAlign = ABIAlign;
I->PrefAlign = PrefAlign;
I->TypeByteWidth = TypeByteWidth;
I->IndexWidth = IndexWidth;
+ I->GlobalBitWidth = GlobalBitWidth;
}
}
@@ -635,6 +651,15 @@
return I->TypeByteWidth;
}
+unsigned DataLayout::getPointerGlobalBitWidth(unsigned AS) const {
+ PointersTy::const_iterator I = findPointerLowerBound(AS);
+ if (I == Pointers.end() || I->AddressSpace != AS) {
+ I = findPointerLowerBound(0);
+ assert(I->AddressSpace == 0);
+ }
+ return I->GlobalBitWidth;
+}
+
unsigned DataLayout::getPointerTypeSizeInBits(Type *Ty) const {
assert(Ty->isPtrOrPtrVectorTy() &&
"This should only be called with a pointer or pointer vector type");
Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -271,7 +271,8 @@
// 32-bit private, local, and region pointers. 64-bit global, constant and
// flat.
- return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
+ return "e-p:64:64-p1:64:64-p2:32:32:32:32:16-p3:32:32:32:32:16"
+ "-p4:64:64-p5:32:32-p6:32:32"
"-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
}
Index: test/Transforms/LoadStoreVectorizer/AMDGPU/ds-bounds.ll
===================================================================
--- test/Transforms/LoadStoreVectorizer/AMDGPU/ds-bounds.ll
+++ test/Transforms/LoadStoreVectorizer/AMDGPU/ds-bounds.ll
@@ -18,12 +18,7 @@
; ALL-LABEL: @store_global_const_idx(
-;
-; TODO: Addresses are known-positive, this could be merged!
-; SI: store i32
-; SI: store i32
-;
-; NONSI: store <2 x i32> , <2 x i32> addrspace(3)* %0, align 4
+; ALL: store <2 x i32> , <2 x i32> addrspace(3)* %0, align 4
define amdgpu_cs void @store_global_const_idx() #0 {
entry:
%ptr.a = getelementptr [512 x i32], [512 x i32] addrspace(3)* @compute_lds, i32 0, i32 3
@@ -51,12 +46,7 @@
; ALL-LABEL: @store_global_var_idx_case2(
-;
-; TODO: Addresses are known-positive, this could be merged!
-; SI: store i32
-; SI: store i32
-;
-; NONSI: store <2 x i32> , <2 x i32> addrspace(3)* %0, align 4
+; ALL: store <2 x i32> , <2 x i32> addrspace(3)* %0, align 4
define amdgpu_cs void @store_global_var_idx_case2(i32 %idx) #0 {
entry:
%idx.and = and i32 %idx, 255