Index: clang/test/CodeGen/ms-intrinsics.c
===================================================================
--- clang/test/CodeGen/ms-intrinsics.c
+++ clang/test/CodeGen/ms-intrinsics.c
@@ -499,13 +499,13 @@
 int test_iso_volatile_load32(int volatile *p) { return __iso_volatile_load32(p); }
 __int64 test_iso_volatile_load64(__int64 volatile *p) { return __iso_volatile_load64(p); }
 
-// CHECK: define{{.*}}i8 @test_iso_volatile_load8(i8*{{[a-z_ ]*}}%p)
+// CHECK: define{{.*}}i8 @test_iso_volatile_load8(i8*{{[a-z0-9_() ]*}}%p)
 // CHECK: = load volatile i8, i8* %p
-// CHECK: define{{.*}}i16 @test_iso_volatile_load16(i16*{{[a-z_ ]*}}%p)
+// CHECK: define{{.*}}i16 @test_iso_volatile_load16(i16*{{[a-z0-9_() ]*}}%p)
 // CHECK: = load volatile i16, i16* %p
-// CHECK: define{{.*}}i32 @test_iso_volatile_load32(i32*{{[a-z_ ]*}}%p)
+// CHECK: define{{.*}}i32 @test_iso_volatile_load32(i32*{{[a-z0-9_() ]*}}%p)
 // CHECK: = load volatile i32, i32* %p
-// CHECK: define{{.*}}i64 @test_iso_volatile_load64(i64*{{[a-z_ ]*}}%p)
+// CHECK: define{{.*}}i64 @test_iso_volatile_load64(i64*{{[a-z0-9_() ]*}}%p)
 // CHECK: = load volatile i64, i64* %p
 
 void test_iso_volatile_store8(char volatile *p, char v) { __iso_volatile_store8(p, v); }
Index: clang/test/CodeGen/ms-x86-intrinsics.c
===================================================================
--- clang/test/CodeGen/ms-x86-intrinsics.c
+++ clang/test/CodeGen/ms-x86-intrinsics.c
@@ -120,7 +120,7 @@
                     __int64 *HighProduct) {
   return _mul128(Multiplier, Multiplicand, HighProduct);
 }
-// CHECK-X64-LABEL: define dso_local i64 @test_mul128(i64 %Multiplier, i64 %Multiplicand, i64*{{[a-z_ ]*}}%HighProduct)
+// CHECK-X64-LABEL: define dso_local i64 @test_mul128(i64 %Multiplier, i64 %Multiplicand, i64*{{[a-z0-9()_ ]*}}%HighProduct)
 // CHECK-X64: = sext i64 %Multiplier to i128
 // CHECK-X64: = sext i64 %Multiplicand to i128
 // CHECK-X64: = mul nsw i128 %
@@ -132,7 +132,7 @@
                               unsigned __int64 *HighProduct) {
   return _umul128(Multiplier, Multiplicand, HighProduct);
 }
-// CHECK-X64-LABEL: define dso_local i64 @test_umul128(i64 %Multiplier, i64 %Multiplicand, i64*{{[a-z_ ]*}}%HighProduct)
+// CHECK-X64-LABEL: define dso_local i64 @test_umul128(i64 %Multiplier, i64 %Multiplicand, i64*{{[a-z0-9()_ ]*}}%HighProduct)
 // CHECK-X64: = zext i64 %Multiplier to i128
 // CHECK-X64: = zext i64 %Multiplicand to i128
 // CHECK-X64: = mul nuw i128 %
Index: clang/test/CodeGen/systemz-inline-asm.c
===================================================================
--- clang/test/CodeGen/systemz-inline-asm.c
+++ clang/test/CodeGen/systemz-inline-asm.c
@@ -123,7 +123,7 @@
 long double test_f128(long double f, long double g) {
   asm("axbr %0, %2" : "=f" (f) : "0" (f), "f" (g));
   return f;
-// CHECK: define void @test_f128(fp128* noalias nocapture sret [[DEST:%.*]], fp128* nocapture readonly, fp128* nocapture readonly)
+// CHECK: define void @test_f128(fp128* noalias nocapture sret dereferenceable(16) [[DEST:%.*]], fp128* nocapture readonly dereferenceable(16), fp128* nocapture readonly dereferenceable(16))
 // CHECK: %f = load fp128, fp128* %0
 // CHECK: %g = load fp128, fp128* %1
 // CHECK: [[RESULT:%.*]] = tail call fp128 asm "axbr $0, $2", "=f,0,f"(fp128 %f, fp128 %g)
Index: clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
===================================================================
--- clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
+++ clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
@@ -28,7 +28,7 @@
 // CHECK: spir_kernel
 // AMDGCN: define amdgpu_kernel void @test_single
 // CHECK: struct.int_single* nocapture {{.*}} byval(%struct.int_single)
-// CHECK: i32* nocapture %output
+// CHECK: i32* nocapture dereferenceable(4) %output
  output[0] = input.a;
 }
 
@@ -36,7 +36,7 @@
 // CHECK: spir_kernel
 // AMDGCN: define amdgpu_kernel void @test_pair
 // CHECK: struct.int_pair* nocapture {{.*}} byval(%struct.int_pair)
-// CHECK: i32* nocapture %output
+// CHECK: i32* nocapture dereferenceable(8) %output
  output[0] = (int)input.a;
  output[1] = (int)input.b;
 }
@@ -45,7 +45,7 @@
 // CHECK: spir_kernel
 // AMDGCN: define amdgpu_kernel void @test_kernel
 // CHECK: struct.test_struct* nocapture {{.*}} byval(%struct.test_struct)
-// CHECK: i32* nocapture %output
+// CHECK: i32* nocapture dereferenceable(32) %output
  output[0] = input.elementA;
  output[1] = input.elementB;
  output[2] = (int)input.elementC;
@@ -59,7 +59,7 @@
 void test_function(int_pair input, global int* output) {
 // CHECK-NOT: spir_kernel
 // AMDGCN-NOT: define amdgpu_kernel void @test_function
-// CHECK: i64 %input.coerce0, i64 %input.coerce1, i32* nocapture %output
+// CHECK: i64 %input.coerce0, i64 %input.coerce1, i32* nocapture dereferenceable(8) %output
  output[0] = (int)input.a;
  output[1] = (int)input.b;
 }
Index: llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
===================================================================
--- llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
+++ llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
@@ -8,6 +8,7 @@
 
 #include "llvm/Transforms/IPO/InferFunctionAttrs.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
@@ -16,32 +17,159 @@
 #include "llvm/Transforms/Utils/BuildLibCalls.h"
 using namespace llvm;
 
+// TODO: Could use an LLVM set container, but requires sorting?
+using SetOfOffsets = std::set<int64_t>;
+using ArgToOffsetsMap = SmallDenseMap<Argument *, SetOfOffsets>;
+
 #define DEBUG_TYPE "inferattrs"
 
-static bool inferAllPrototypeAttributes(Module &M,
-                                        const TargetLibraryInfo &TLI) {
+// FIXME: This entire pass should be deprecated by making the "Attributor" pass
+//        handle these kinds of inferences.
+
+static void getArgToOffsetsMap(Function &F, ArgToOffsetsMap &ArgOffsetMap) {
+  // To apply a dereferenceable attribute to an argument based on a memory
+  // access in the function, the access must be guaranteed to execute every time
+  // the function is called.
+  // Conservatively, only check for memory ops in the entry block that are
+  // guaranteed to execute.
+  // TODO: This could be enhanced by testing if a memory access post-dominates
+  //       the entry block (walking to/from the load). We can also check if a
+  //       block is guaranteed to transfer execution to another block.
+  const DataLayout &DL = F.getParent()->getDataLayout();
+  BasicBlock &Entry = F.getEntryBlock();
+  for (Instruction &I : Entry) {
+    // Analyze pointer operands of any load/store instruction.
+    // TODO: Allow cmpxchg and atomicrmw opcodes.
+    // TODO: "isSimple()" excludes atomic ops, but some subset of those should
+    //       be allowed.
+    Value *PtrOp = nullptr;
+    switch (I.getOpcode()) {
+    case Instruction::Load: {
+      auto *Load = cast<LoadInst>(&I);
+      if (Load->isSimple())
+        PtrOp = Load->getPointerOperand();
+      break;
+    }
+    case Instruction::Store: {
+      auto *Store = cast<StoreInst>(&I);
+      if (Store->isSimple())
+        PtrOp = Store->getPointerOperand();
+      break;
+    }
+    default:
+      break;
+    }
+    if (!PtrOp) {
+      if (!isGuaranteedToTransferExecutionToSuccessor(&I))
+        return;
+      continue;
+    }
+    assert(isGuaranteedToTransferExecutionToSuccessor(&I) &&
+           "Expected simple memory access to transfer execution");
+
+    // Decompose the pointer into base (which must be a function argument) and
+    // offset. Ignore negative offsets because the dereferenceable range must
+    // begin at the argument.
+    int64_t ByteOffset;
+    Value *Base = GetPointerBaseWithConstantOffset(PtrOp, ByteOffset, DL);
+    auto *Arg = dyn_cast<Argument>(Base);
+    if (!Arg || ByteOffset < 0)
+      continue;
+
+    // Make sure we have a pointer to a type that is a multiple of 8-bit bytes
+    // because the 'dereferenceable' attribute range is specified using bytes.
+    // TODO: We can handle weird bitwidths by rounding down.
+    assert(Arg->getType()->isPointerTy() && "Unexpected non-pointer type");
+    Type *ArgEltType = cast<PointerType>(Arg->getType())->getElementType();
+    unsigned ArgSizeInBits = ArgEltType->getPrimitiveSizeInBits();
+    if (!ArgSizeInBits || ArgSizeInBits % 8 != 0)
+      continue;
+
+    // TODO: This restriction can be removed, but that will make the range
+    //       calculation more complicated. Instead of only tracking whole number
+    //       offsets from the base, we have to track individual offsets and
+    //       ranges (fractional and multiple offsets are possible via casts).
+    assert(isa<PointerType>(PtrOp->getType()) && "Expected pointer type");
+    Type *AccessType = cast<PointerType>(PtrOp->getType())->getElementType();
+    unsigned AccessSizeInBits = AccessType->getPrimitiveSizeInBits();
+    if (AccessSizeInBits != ArgSizeInBits)
+      continue;
+
+    assert((ByteOffset % (AccessSizeInBits / 8)) == 0 &&
+           "Unexpected address offset calculation");
+    SetOfOffsets &OffsetsForArg = ArgOffsetMap[Arg];
+    OffsetsForArg.insert(ByteOffset / (AccessSizeInBits / 8));
+  }
+}
+
+static bool inferDereferenceableFromMemoryAccesses(Function &F) {
+  ArgToOffsetsMap ArgOffsetMap;
+  getArgToOffsetsMap(F, ArgOffsetMap);
   bool Changed = false;
 
-  for (Function &F : M.functions())
-    // We only infer things using the prototype and the name; we don't need
-    // definitions.
-    if (F.isDeclaration() && !F.hasOptNone())
-      Changed |= inferLibFuncAttributes(F, TLI);
+  // For any pointer argument that we matched with memory accesses...
+  for (auto &ArgAndOffsetPair : ArgOffsetMap) {
+    Argument *Arg = ArgAndOffsetPair.getFirst();
+    SetOfOffsets &Offsets = ArgAndOffsetPair.getSecond();
+
+    // Determine how many consecutive memory accesses that we found. The set is
+    // sorted, so as soon as we miss an offset from the pointer, we are done.
+    // We do not know if a chunk of memory is dereferenceable without an access.
+    // TODO: See size limitation in getArgToOffsetsMap(). If we allow varying
+    //       sizes of accesses from an argument, this will not be valid.
+    int64_t MaxOffset = 0;
+    for (int64_t Offset : Offsets) {
+      if (Offset != MaxOffset)
+        break;
+      ++MaxOffset;
+    }
+    // If there was no access directly from this pointer argument, give up.
+    // TODO: We could extend an existing known dereferenceable argument with
+    //       extra bytes even if there are missing leading chunks.
+    if (!MaxOffset)
+      continue;
+
+    auto *PtrTy = cast<PointerType>(Arg->getType());
+    unsigned EltSize = PtrTy->getElementType()->getPrimitiveSizeInBits();
+    uint64_t DerefBytes = MaxOffset * (EltSize / 8);
+
+    // Replace existing dereferenceable attributes if we determined that more
+    // bytes are always accessed.
+    unsigned ArgNumber = Arg->getArgNo();
+    if (F.getParamDereferenceableBytes(ArgNumber) < DerefBytes) {
+      F.removeParamAttr(ArgNumber, Attribute::Dereferenceable);
+      F.removeParamAttr(ArgNumber, Attribute::DereferenceableOrNull);
+      F.addDereferenceableParamAttr(ArgNumber, DerefBytes);
+      Changed = true;
+    }
+  }
 
   return Changed;
 }
 
+static bool inferAttributes(Module &M, const TargetLibraryInfo &TLI) {
+  bool Changed = false;
+
+  for (Function &F : M.functions()) {
+    if (F.hasOptNone())
+      continue;
+    // For libfunc attributes, we infer things using the prototype and the name.
+    // For other attributes, we need to look at the function definition.
+    if (F.isDeclaration())
+      Changed |= inferLibFuncAttributes(F, TLI);
+    else
+      Changed |= inferDereferenceableFromMemoryAccesses(F);
+  }
+  return Changed;
+}
+
 PreservedAnalyses InferFunctionAttrsPass::run(Module &M,
                                               ModuleAnalysisManager &AM) {
+  // If we may have changed fundamental function attributes, clear analyses.
+  // If we didn't infer anything, preserve all analyses.
   auto &TLI = AM.getResult<TargetLibraryAnalysis>(M);
-
-  if (!inferAllPrototypeAttributes(M, TLI))
-    // If we didn't infer anything, preserve all analyses.
-    return PreservedAnalyses::all();
-
-  // Otherwise, we may have changed fundamental function attributes, so clear
-  // out all the passes.
-  return PreservedAnalyses::none();
+  return inferAttributes(M, TLI) ? PreservedAnalyses::none()
+                                 : PreservedAnalyses::all();
 }
 
 namespace {
@@ -61,7 +189,7 @@
       return false;
 
     auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
-    return inferAllPrototypeAttributes(M, TLI);
+    return inferAttributes(M, TLI);
   }
 };
 }
Index: llvm/test/CodeGen/AMDGPU/inline-attr.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/inline-attr.ll
+++ llvm/test/CodeGen/AMDGPU/inline-attr.ll
@@ -3,7 +3,7 @@
 ; RUN: opt -mtriple=amdgcn--amdhsa -S -O3 -enable-no-infs-fp-math %s | FileCheck -check-prefix=GCN -check-prefix=NOINFS %s
 
 ; GCN: define float @foo(float %x) local_unnamed_addr #0 {
-; GCN: define amdgpu_kernel void @caller(float addrspace(1)* nocapture %p) local_unnamed_addr #1 {
+; GCN: define amdgpu_kernel void @caller(float addrspace(1)* nocapture dereferenceable(4) %p) local_unnamed_addr #1 {
 ; GCN: %mul.i = fmul float %load, 1.500000e+01
 
 ; UNSAFE: attributes #0 = { norecurse nounwind readnone "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" }
Index: llvm/test/Transforms/InferFunctionAttrs/dereferenceable.ll
===================================================================
--- llvm/test/Transforms/InferFunctionAttrs/dereferenceable.ll
+++ llvm/test/Transforms/InferFunctionAttrs/dereferenceable.ll
@@ -1,10 +1,11 @@
 ; RUN: opt < %s -inferattrs -S | FileCheck %s
+; RUN: opt < %s -passes=inferattrs -S | FileCheck %s
 
 ; Determine dereference-ability before unused loads get deleted:
 ; https://bugs.llvm.org/show_bug.cgi?id=21780
 
 define <4 x double> @PR21780(double* %ptr) {
-; CHECK-LABEL: @PR21780(double* %ptr)
+; CHECK-LABEL: @PR21780(double* dereferenceable(32) %ptr)
   ; GEP of index 0 is simplified away.
   %arrayidx1 = getelementptr inbounds double, double* %ptr, i64 1
   %arrayidx2 = getelementptr inbounds double, double* %ptr, i64 2
@@ -23,10 +24,10 @@
   ret <4 x double> %shuffle
 }
 
-; Unsimplified, but still valid. Also, throw in some bogus arguments.
+; Unsimplified, but still valid. Also, throw in a bogus argument and a store argument.
 
 define void @gep0(i8* %unused, i8* %other, i8* %ptr) {
-; CHECK-LABEL: @gep0(i8* %unused, i8* %other, i8* %ptr)
+; CHECK-LABEL: @gep0(i8* %unused, i8* dereferenceable(1) %other, i8* dereferenceable(3) %ptr)
   %arrayidx0 = getelementptr i8, i8* %ptr, i64 0
   %arrayidx1 = getelementptr i8, i8* %ptr, i64 1
   %arrayidx2 = getelementptr i8, i8* %ptr, i64 2
@@ -41,7 +42,7 @@
 ; Multiple arguments may be dereferenceable.
 
 define void @ordering(i8* %ptr1, i32* %ptr2) {
-; CHECK-LABEL: @ordering(i8* %ptr1, i32* %ptr2)
+; CHECK-LABEL: @ordering(i8* dereferenceable(3) %ptr1, i32* dereferenceable(8) %ptr2)
   %a20 = getelementptr i32, i32* %ptr2, i64 0
   %a12 = getelementptr i8, i8* %ptr1, i64 2
   %t12 = load i8, i8* %a12
@@ -71,7 +72,7 @@
   ret void
 }
 
-; Not in entry block and not guaranteed to execute.
+; Negative test - not in entry block and not guaranteed to execute.
 
 define void @not_entry_not_guaranteed_to_execute(i8* %ptr, i1 %cond) {
 ; CHECK-LABEL: @not_entry_not_guaranteed_to_execute(i8* %ptr, i1 %cond)
@@ -92,7 +93,7 @@
 ; The last load may not execute, so derefenceable bytes only covers the 1st two loads.
 
 define void @partial_in_entry(i16* %ptr, i1 %cond) {
-; CHECK-LABEL: @partial_in_entry(i16* %ptr, i1 %cond)
+; CHECK-LABEL: @partial_in_entry(i16* dereferenceable(4) %ptr, i1 %cond)
 entry:
   %arrayidx0 = getelementptr i16, i16* %ptr, i64 0
   %arrayidx1 = getelementptr i16, i16* %ptr, i64 1
@@ -121,10 +122,23 @@
   ret void
 }
 
+; TODO: We should allow inference for atomic (but not volatile) ops.
+
+define void @atomic_is_alright(i16* %ptr) {
+; CHECK-LABEL: @atomic_is_alright(i16* %ptr)
+  %arrayidx0 = getelementptr i16, i16* %ptr, i64 0
+  %arrayidx1 = getelementptr i16, i16* %ptr, i64 1
+  %arrayidx2 = getelementptr i16, i16* %ptr, i64 2
+  %t0 = load atomic i16, i16* %arrayidx0 unordered, align 2
+  %t1 = load i16, i16* %arrayidx1
+  %t2 = load i16, i16* %arrayidx2
+  ret void
+}
+
 declare void @may_not_return()
 
 define void @not_guaranteed_to_transfer_execution(i16* %ptr) {
-; CHECK-LABEL: @not_guaranteed_to_transfer_execution(i16* %ptr)
+; CHECK-LABEL: @not_guaranteed_to_transfer_execution(i16* dereferenceable(2) %ptr)
   %arrayidx0 = getelementptr i16, i16* %ptr, i64 0
   %arrayidx1 = getelementptr i16, i16* %ptr, i64 1
   %arrayidx2 = getelementptr i16, i16* %ptr, i64 2
@@ -138,7 +152,7 @@
 ; We must have consecutive accesses.
 
 define void @variable_gep_index(i8* %unused, i8* %ptr, i64 %variable_index) {
-; CHECK-LABEL: @variable_gep_index(i8* %unused, i8* %ptr, i64 %variable_index)
+; CHECK-LABEL: @variable_gep_index(i8* %unused, i8* dereferenceable(1) %ptr, i64 %variable_index)
   %arrayidx1 = getelementptr i8, i8* %ptr, i64 %variable_index
   %arrayidx2 = getelementptr i8, i8* %ptr, i64 2
   %t0 = load i8, i8* %ptr
@@ -147,7 +161,7 @@
   ret void
 }
 
-; Deal with >1 GEP index.
+; TODO: Deal with >1 GEP index.
 
 define void @multi_index_gep(<4 x i8>* %ptr) {
 ; CHECK-LABEL: @multi_index_gep(<4 x i8>* %ptr)
@@ -156,7 +170,7 @@
   ret void
 }
 
-; Could round weird bitwidths down?
+; TODO: Could round weird bitwidths down?
 
 define void @not_byte_multiple(i9* %ptr) {
 ; CHECK-LABEL: @not_byte_multiple(i9* %ptr)
@@ -165,7 +179,7 @@
   ret void
 }
 
-; Missing direct access from the pointer.
+; Negative test - missing direct access from the pointer.
 
 define void @no_pointer_deref(i16* %ptr) {
 ; CHECK-LABEL: @no_pointer_deref(i16* %ptr)
@@ -179,7 +193,7 @@
 ; Out-of-order is ok, but missing access concludes dereferenceable range.
 
 define void @non_consecutive(i32* %ptr) {
-; CHECK-LABEL: @non_consecutive(i32* %ptr)
+; CHECK-LABEL: @non_consecutive(i32* dereferenceable(8) %ptr)
   %arrayidx1 = getelementptr i32, i32* %ptr, i64 1
   %arrayidx0 = getelementptr i32, i32* %ptr, i64 0
   %arrayidx3 = getelementptr i32, i32* %ptr, i64 3
@@ -192,7 +206,22 @@
 ; Improve on existing dereferenceable attribute.
 
 define void @more_bytes(i32* dereferenceable(8) %ptr) {
-; CHECK-LABEL: @more_bytes(i32* dereferenceable(8) %ptr)
+; CHECK-LABEL: @more_bytes(i32* dereferenceable(16) %ptr)
+  %arrayidx3 = getelementptr i32, i32* %ptr, i64 3
+  %arrayidx1 = getelementptr i32, i32* %ptr, i64 1
+  %arrayidx0 = getelementptr i32, i32* %ptr, i64 0
+  %arrayidx2 = getelementptr i32, i32* %ptr, i64 2
+  %t3 = load i32, i32* %arrayidx3
+  %t1 = load i32, i32* %arrayidx1
+  %t2 = load i32, i32* %arrayidx2
+  %t0 = load i32, i32* %arrayidx0
+  ret void
+}
+
+; Improve on existing dereferenceable_or_null attribute.
+
+define void @more_bytes_and_not_null(i32* dereferenceable_or_null(8) %ptr) {
+; CHECK-LABEL: @more_bytes_and_not_null(i32* dereferenceable(16) %ptr)
   %arrayidx3 = getelementptr i32, i32* %ptr, i64 3
   %arrayidx1 = getelementptr i32, i32* %ptr, i64 1
   %arrayidx0 = getelementptr i32, i32* %ptr, i64 0
@@ -204,7 +233,7 @@
   ret void
 }
 
-; But don't pessimize existing dereferenceable attribute.
+; Negative test - don't pessimize existing dereferenceable attribute.
 
 define void @better_bytes(i32* dereferenceable(100) %ptr) {
 ; CHECK-LABEL: @better_bytes(i32* dereferenceable(100) %ptr)
@@ -219,8 +248,10 @@
   ret void
 }
 
+; Peeking through same-size-element bitcast is supported.
+
 define void @bitcast(i32* %arg) {
-; CHECK-LABEL: @bitcast(i32* %arg)
+; CHECK-LABEL: @bitcast(i32* dereferenceable(8) %arg)
   %ptr = bitcast i32* %arg to float*
   %arrayidx0 = getelementptr float, float* %ptr, i64 0
   %arrayidx1 = getelementptr float, float* %ptr, i64 1
@@ -229,6 +260,8 @@
   ret void
 }
 
+; TODO: Enhance to allow arbitrary sub-ranges.
+
 define void @bitcast_different_sizes(double* %arg1, i8* %arg2) {
 ; CHECK-LABEL: @bitcast_different_sizes(double* %arg1, i8* %arg2)
   %ptr1 = bitcast double* %arg1 to float*
@@ -247,8 +280,10 @@
   ret void
 }
 
+; The attribute has a length, not a range, so can't represent this better.
+
 define void @negative_offset(i32* %arg) {
-; CHECK-LABEL: @negative_offset(i32* %arg)
+; CHECK-LABEL: @negative_offset(i32* dereferenceable(4) %arg)
   %ptr = bitcast i32* %arg to float*
   %arrayidx0 = getelementptr float, float* %ptr, i64 0
   %arrayidx1 = getelementptr float, float* %ptr, i64 -1
@@ -257,8 +292,10 @@
   ret void
 }
 
+; Simple store accesses allow inferring too.
+
 define void @stores(i32* %arg) {
-; CHECK-LABEL: @stores(i32* %arg)
+; CHECK-LABEL: @stores(i32* dereferenceable(8) %arg)
   %ptr = bitcast i32* %arg to float*
   %arrayidx0 = getelementptr float, float* %ptr, i64 0
   %arrayidx1 = getelementptr float, float* %ptr, i64 1
@@ -267,8 +304,10 @@
   ret void
 }
 
+; Combinations of load/store can be used together.
+
 define void @load_store(i32* %arg) {
-; CHECK-LABEL: @load_store(i32* %arg)
+; CHECK-LABEL: @load_store(i32* dereferenceable(8) %arg)
   %ptr = bitcast i32* %arg to float*
   %arrayidx0 = getelementptr float, float* %ptr, i64 0
   %arrayidx1 = getelementptr float, float* %ptr, i64 1