diff --git a/llvm/include/llvm/Analysis/AssumeBundleQueries.h b/llvm/include/llvm/Analysis/AssumeBundleQueries.h
--- a/llvm/include/llvm/Analysis/AssumeBundleQueries.h
+++ b/llvm/include/llvm/Analysis/AssumeBundleQueries.h
@@ -106,6 +106,15 @@
            ArgValue == Other.ArgValue;
   }
   bool operator!=(RetainedKnowledge Other) const { return !(*this == Other); }
+  /// This is only intended for use in std::min/std::max between attribute that
+  /// only differ in ArgValue.
+  bool operator<(RetainedKnowledge Other) const {
+    assert(((AttrKind == Other.AttrKind && WasOn == Other.WasOn) ||
+            AttrKind == Attribute::None || Other.AttrKind == Attribute::None) &&
+           "This is only intend for use in min/max to select the best for "
+           "RetainedKnowledge that is otherwise equal");
+    return ArgValue < Other.ArgValue;
+  }
   operator bool() const { return AttrKind != Attribute::None; }
   static RetainedKnowledge none() { return RetainedKnowledge{}; }
 };
diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h
--- a/llvm/include/llvm/IR/Value.h
+++ b/llvm/include/llvm/IR/Value.h
@@ -460,6 +460,9 @@
   /// This is specialized because it is a common request and does not require
   /// traversing the whole use list.
   Use *getSingleUndroppableUse();
+  const Use *getSingleUndroppableUse() const {
+    return const_cast<Value *>(this)->getSingleUndroppableUse();
+  }
 
   /// Return true if there this value.
   ///
diff --git a/llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h b/llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h
--- a/llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h
+++ b/llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h
@@ -16,6 +16,7 @@
 #ifndef LLVM_TRANSFORMS_UTILS_ASSUMEBUNDLEBUILDER_H
 #define LLVM_TRANSFORMS_UTILS_ASSUMEBUNDLEBUILDER_H
 
+#include "llvm/Analysis/AssumeBundleQueries.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/PassManager.h"
@@ -41,6 +42,13 @@
 void salvageKnowledge(Instruction *I, AssumptionCache *AC = nullptr,
                       DominatorTree *DT = nullptr);
 
+/// Build and return a new assume created from the provided knowledge
+/// if the knowledge in the assume is fully redundant this will return nullptr
+IntrinsicInst *buildAssumeFromKnowledge(ArrayRef<RetainedKnowledge> Knowledge,
+                                        Instruction *CtxI,
+                                        AssumptionCache *AC = nullptr,
+                                        DominatorTree *DT = nullptr);
+
 /// This pass attempts to minimize the number of assume without loosing any
 /// information.
 struct AssumeSimplifyPass : public PassInfoMixin<AssumeSimplifyPass> {
@@ -55,6 +63,14 @@
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
+/// canonicalize the RetainedKnowledge RK. it is assumed that RK is part of
+/// Assume. This will return an empty RetainedKnowledge if the knowledge is
+/// useless.
+RetainedKnowledge simplifyRetainedKnowledge(CallBase *Assume,
+                                            RetainedKnowledge RK,
+                                            AssumptionCache *AC,
+                                            DominatorTree *DT);
+
 } // namespace llvm
 
 #endif
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -12,6 +12,7 @@
 
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumeBundleQueries.h"
 #include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
@@ -80,6 +81,31 @@
       return isAligned(V, Offset, Alignment, DL);
     }
 
+  if (CtxI) {
+    /// Look through assumes to see if both dereferencability and alignment can
+    /// be provent by an assume
+    RetainedKnowledge AlignRK;
+    RetainedKnowledge DerefRK;
+    if (getKnowledgeForValue(
+            V, {Attribute::Dereferenceable, Attribute::Alignment}, nullptr,
+            [&](RetainedKnowledge RK, Instruction *Assume, auto) {
+              if (!isValidAssumeForContext(Assume, CtxI))
+                return false;
+              if (RK.AttrKind == Attribute::Alignment)
+                AlignRK = std::max(AlignRK, RK);
+              if (RK.AttrKind == Attribute::Dereferenceable)
+                DerefRK = std::max(DerefRK, RK);
+              if (AlignRK && DerefRK && AlignRK.ArgValue >= Alignment.value() &&
+                  DerefRK.ArgValue >= Size.getZExtValue())
+                return true; // We have found what we needed so we stop looking
+              return false;  // Other assumes may have better information. so
+                             // keep looking
+            }))
+      return true;
+  }
+  /// TODO refactor this function to be able to search independently for
+  /// Dereferencability and Alignment requirements.
+
   // For GEPs, determine if the indexing lands within the allocated object.
   if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
     const Value *Base = GEP->getPointerOperand();
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -67,6 +67,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
 #include "llvm/Transforms/InstCombine/InstCombiner.h"
+#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
 #include <algorithm>
@@ -89,6 +90,10 @@
     cl::desc("How wide an instruction window to bypass looking for "
              "another guard"));
 
+/// enable preservation of attributes in assume like:
+/// call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
+extern cl::opt<bool> EnableKnowledgeRetention;
+
 /// Return the specified type promoted as it would be to pass though a va_arg
 /// area.
 static Type *getPromotedType(Type *Ty) {
@@ -1511,15 +1516,23 @@
     Value *IIOperand = II->getArgOperand(0);
     SmallVector<OperandBundleDef, 4> OpBundles;
     II->getOperandBundlesAsDefs(OpBundles);
-    bool HasOpBundles = !OpBundles.empty();
+
+    /// This will remove the boolean Condition from the assume given as
+    /// argument and remove the assume if it becomes useless.
+    /// always returns nullptr for use as a return values.
+    auto RemoveConditionFromAssume = [&](Instruction *Assume) -> Instruction * {
+      assert(isa<IntrinsicInst>(Assume));
+      if (isAssumeWithEmptyBundle(*cast<IntrinsicInst>(II)))
+        return eraseInstFromFunction(CI);
+      replaceUse(II->getOperandUse(0), ConstantInt::getTrue(II->getContext()));
+      return nullptr;
+    };
     // Remove an assume if it is followed by an identical assume.
     // TODO: Do we need this? Unless there are conflicting assumptions, the
     // computeKnownBits(IIOperand) below here eliminates redundant assumes.
     Instruction *Next = II->getNextNonDebugInstruction();
-    if (HasOpBundles &&
-        match(Next, m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))) &&
-        !cast<IntrinsicInst>(Next)->hasOperandBundles())
-      return eraseInstFromFunction(CI);
+    if (match(Next, m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))))
+      return RemoveConditionFromAssume(Next);
 
     // Canonicalize assume(a && b) -> assume(a); assume(b);
     // Note: New assumption intrinsics created here are registered by
@@ -1552,13 +1565,95 @@
         isValidAssumeForContext(II, LHS, &DT)) {
       MDNode *MD = MDNode::get(II->getContext(), None);
       LHS->setMetadata(LLVMContext::MD_nonnull, MD);
-      if (!HasOpBundles)
-        return eraseInstFromFunction(*II);
+      return RemoveConditionFromAssume(II);
 
       // TODO: apply nonnull return attributes to calls and invokes
       // TODO: apply range metadata for range check patterns?
     }
 
+    // Convert nonnull assume like:
+    // %A = icmp ne i32* %PTR, null
+    // call void @llvm.assume(i1 %A)
+    // into
+    // call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
+    if (EnableKnowledgeRetention &&
+        match(IIOperand, m_Cmp(Pred, m_Value(A), m_Zero())) &&
+        Pred == CmpInst::ICMP_NE && A->getType()->isPointerTy()) {
+      if (IntrinsicInst *Replacement = buildAssumeFromKnowledge(
+              {RetainedKnowledge{Attribute::NonNull, 0, A}}, Next, &AC, &DT)) {
+
+        Replacement->insertBefore(Next);
+        AC.registerAssumption(Replacement);
+        return RemoveConditionFromAssume(II);
+      }
+    }
+
+    // Convert alignment assume like:
+    // %B = ptrtoint i32* %A to i64
+    // %C = and i64 %B, Constant
+    // %D = icmp eq i64 %C, 0
+    // call void @llvm.assume(i1 %D)
+    // into
+    // call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64  Constant + 1)]
+    uint64_t AlignMask;
+    if (EnableKnowledgeRetention &&
+        match(IIOperand,
+              m_Cmp(Pred, m_And(m_Value(A), m_ConstantInt(AlignMask)),
+                    m_Zero())) &&
+        Pred == CmpInst::ICMP_EQ) {
+      if (isPowerOf2_64(AlignMask + 1)) {
+        uint64_t Offset = 0;
+        match(A, m_Add(m_Value(A), m_ConstantInt(Offset)));
+        if (match(A, m_PtrToInt(m_Value(A)))) {
+          /// Note: this doesn't preserve the offset information but merges
+          /// offset and alignment.
+          /// TODO: we can generate a GEP instead of merging the alignment with
+          /// the offset.
+          RetainedKnowledge RK{Attribute::Alignment,
+                               (unsigned)MinAlign(Offset, AlignMask + 1), A};
+          if (IntrinsicInst *Replacement =
+                  buildAssumeFromKnowledge(RK, Next, &AC, &DT)) {
+
+            Replacement->insertAfter(II);
+            AC.registerAssumption(Replacement);
+          }
+          return RemoveConditionFromAssume(II);
+        }
+      }
+    }
+
+    /// Canonicalize Knowledge in operand bundles.
+    if (II->hasOperandBundles()) {
+      for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) {
+        auto &BOI = II->bundle_op_info_begin()[Idx];
+        RetainedKnowledge RK = llvm::getKnowledgeFromBundle(*II, BOI);
+        if (BOI.End - BOI.Begin > 2)
+          continue; // Prevent reducing knowledge in an align with offset since
+                    // extracting a RetainedKnowledge form them looses offset
+                    // information
+        RetainedKnowledge CanonRK = llvm::simplifyRetainedKnowledge(
+            II, RK, &getAssumptionCache(), &getDominatorTree());
+        if (CanonRK == RK)
+          continue;
+        if (!CanonRK) {
+          if (BOI.End - BOI.Begin > 0) {
+            Worklist.pushValue(II->op_begin()[BOI.Begin]);
+            Value::dropDroppableUse(II->op_begin()[BOI.Begin]);
+          }
+          continue;
+        }
+        assert(RK.AttrKind == CanonRK.AttrKind);
+        if (BOI.End - BOI.Begin > 0)
+          II->op_begin()[BOI.Begin].set(CanonRK.WasOn);
+        if (BOI.End - BOI.Begin > 1)
+          II->op_begin()[BOI.Begin + 1].set(ConstantInt::get(
+              Type::getInt64Ty(II->getContext()), CanonRK.ArgValue));
+        if (RK.WasOn)
+          Worklist.pushValue(RK.WasOn);
+        return II;
+      }
+    }
+
     // If there is a dominating assume with the same condition as this one,
     // then this one is redundant, and should be removed.
     KnownBits Known(1);
diff --git a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
--- a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
+++ b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
@@ -6,8 +6,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "assume-builder"
-
 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/MapVector.h"
@@ -37,6 +35,8 @@
     cl::desc(
         "enable preservation of attributes throughout code transformation"));
 
+#define DEBUG_TYPE "assume-builder"
+
 STATISTIC(NumAssumeBuilt, "Number of assume built by the assume builder");
 STATISTIC(NumBundlesInAssumes, "Total number of Bundles in the assume built");
 STATISTIC(NumAssumesMerged,
@@ -65,7 +65,7 @@
 
 /// This function will try to transform the given knowledge into a more
 /// canonical one. the canonical knowledge maybe the given one.
-RetainedKnowledge canonicalizedKnowledge(RetainedKnowledge RK, Module *M) {
+RetainedKnowledge canonicalizedKnowledge(RetainedKnowledge RK, DataLayout DL) {
   switch (RK.AttrKind) {
   default:
     return RK;
@@ -76,8 +76,7 @@
     Value *V = RK.WasOn->stripInBoundsOffsets([&](const Value *Strip) {
       if (auto *GEP = dyn_cast<GEPOperator>(Strip))
         RK.ArgValue =
-            MinAlign(RK.ArgValue,
-                     GEP->getMaxPreservedAlignment(M->getDataLayout()).value());
+            MinAlign(RK.ArgValue, GEP->getMaxPreservedAlignment(DL).value());
     });
     RK.WasOn = V;
     return RK;
@@ -85,8 +84,8 @@
   case Attribute::Dereferenceable:
   case Attribute::DereferenceableOrNull: {
     int64_t Offset = 0;
-    Value *V = GetPointerBaseWithConstantOffset(
-        RK.WasOn, Offset, M->getDataLayout(), /*AllowNonInBounds*/ false);
+    Value *V = GetPointerBaseWithConstantOffset(RK.WasOn, Offset, DL,
+                                                /*AllowNonInBounds*/ false);
     if (Offset < 0)
       return RK;
     RK.ArgValue = RK.ArgValue + Offset;
@@ -103,16 +102,16 @@
 
   using MapKey = std::pair<Value *, Attribute::AttrKind>;
   SmallMapVector<MapKey, unsigned, 8> AssumedKnowledgeMap;
-  Instruction *InstBeingRemoved = nullptr;
+  Instruction *InstBeingModified = nullptr;
   AssumptionCache* AC = nullptr;
   DominatorTree* DT = nullptr;
 
   AssumeBuilderState(Module *M, Instruction *I = nullptr,
                      AssumptionCache *AC = nullptr, DominatorTree *DT = nullptr)
-      : M(M), InstBeingRemoved(I), AC(AC), DT(DT) {}
+      : M(M), InstBeingModified(I), AC(AC), DT(DT) {}
 
   bool tryToPreserveWithoutAddingAssume(RetainedKnowledge RK) {
-    if (!InstBeingRemoved || !RK.WasOn)
+    if (!InstBeingModified || !RK.WasOn)
       return false;
     bool HasBeenPreserved = false;
     Use* ToUpdate = nullptr;
@@ -120,13 +119,12 @@
         RK.WasOn, {RK.AttrKind}, AC,
         [&](RetainedKnowledge RKOther, Instruction *Assume,
             const CallInst::BundleOpInfo *Bundle) {
-          if (!isValidAssumeForContext(Assume, InstBeingRemoved, DT))
+          if (!isValidAssumeForContext(Assume, InstBeingModified, DT))
             return false;
           if (RKOther.ArgValue >= RK.ArgValue) {
             HasBeenPreserved = true;
             return true;
-          } else if (isValidAssumeForContext(InstBeingRemoved, Assume,
-                                             DT)) {
+          } else if (isValidAssumeForContext(InstBeingModified, Assume, DT)) {
             HasBeenPreserved = true;
             IntrinsicInst *Intr = cast<IntrinsicInst>(Assume);
             ToUpdate = &Intr->op_begin()[Bundle->Begin + ABA_Argument];
@@ -162,14 +160,14 @@
         if (RK.WasOn->use_empty())
           return false;
         Use *SingleUse = RK.WasOn->getSingleUndroppableUse();
-        if (SingleUse && SingleUse->getUser() == InstBeingRemoved)
+        if (SingleUse && SingleUse->getUser() == InstBeingModified)
           return false;
       }
     return true;
   }
 
   void addKnowledge(RetainedKnowledge RK) {
-    RK = canonicalizedKnowledge(RK, M);
+    RK = canonicalizedKnowledge(RK, M->getDataLayout());
 
     if (!isKnowledgeWorthPreserving(RK))
       return;
@@ -299,6 +297,32 @@
   }
 }
 
+IntrinsicInst *
+llvm::buildAssumeFromKnowledge(ArrayRef<RetainedKnowledge> Knowledge,
+                               Instruction *CtxI, AssumptionCache *AC,
+                               DominatorTree *DT) {
+  AssumeBuilderState Builder(CtxI->getModule(), CtxI, AC, DT);
+  for (const RetainedKnowledge &RK : Knowledge)
+    Builder.addKnowledge(RK);
+  return Builder.build();
+}
+
+RetainedKnowledge llvm::simplifyRetainedKnowledge(CallBase *Assume,
+                                                  RetainedKnowledge RK,
+                                                  AssumptionCache *AC,
+                                                  DominatorTree *DT) {
+  assert(Assume->getIntrinsicID() == Intrinsic::assume);
+  AssumeBuilderState Builder(Assume->getModule(), Assume, AC, DT);
+  RK = canonicalizedKnowledge(RK, Assume->getModule()->getDataLayout());
+
+  if (!Builder.isKnowledgeWorthPreserving(RK))
+    return RetainedKnowledge::none();
+
+  if (Builder.tryToPreserveWithoutAddingAssume(RK))
+    return RetainedKnowledge::none();
+  return RK;
+}
+
 namespace {
 
 struct AssumeSimplify {
diff --git a/llvm/test/Analysis/BasicAA/featuretest.ll b/llvm/test/Analysis/BasicAA/featuretest.ll
--- a/llvm/test/Analysis/BasicAA/featuretest.ll
+++ b/llvm/test/Analysis/BasicAA/featuretest.ll
@@ -20,7 +20,6 @@
 ; CHECK-NEXT:    [[ARRAY22:%.*]] = alloca [200 x i32], align 4
 ; CHECK-NEXT:    [[ARRAY22_SUB:%.*]] = getelementptr inbounds [200 x i32], [200 x i32]* [[ARRAY22]], i64 0, i64 0
 ; CHECK-NEXT:    [[ARRAY11_SUB:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[ARRAY11]], i64 0, i64 0
-; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[ARRAY11_SUB]], i32 4) ]
 ; CHECK-NEXT:    call void @external(i32* nonnull [[ARRAY11_SUB]])
 ; CHECK-NEXT:    call void @external(i32* nonnull [[ARRAY22_SUB]])
 ; CHECK-NEXT:    [[POINTER2:%.*]] = getelementptr [200 x i32], [200 x i32]* [[ARRAY22]], i64 0, i64 [[B:%.*]]
diff --git a/llvm/test/Analysis/ValueTracking/assume-queries-counter.ll b/llvm/test/Analysis/ValueTracking/assume-queries-counter.ll
--- a/llvm/test/Analysis/ValueTracking/assume-queries-counter.ll
+++ b/llvm/test/Analysis/ValueTracking/assume-queries-counter.ll
@@ -1,9 +1,9 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; REQUIRES: asserts
 
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine --debug-counter=assume-queries-counter-skip=0,assume-queries-counter-count=1 -S | FileCheck %s --check-prefixes=COUNTER1
-; RUN: opt < %s -instcombine --debug-counter=assume-queries-counter-skip=1,assume-queries-counter-count=6 -S | FileCheck %s --check-prefixes=COUNTER2
-; RUN: opt < %s -instcombine --debug-counter=assume-queries-counter-skip=6,assume-queries-counter-count=1 -S | FileCheck %s --check-prefixes=COUNTER3
+; RUN: opt < %s -instcombine --debug-counter=assume-queries-counter-skip=0,assume-queries-counter-count=1 -S | FileCheck %s --check-prefixes=SAME,COUNTER1
+; RUN: opt < %s -instcombine --debug-counter=assume-queries-counter-skip=1,assume-queries-counter-count=2 -S | FileCheck %s --check-prefixes=SAME,COUNTER2
+; RUN: opt < %s -instcombine --debug-counter=assume-queries-counter-skip=2,assume-queries-counter-count=5 -S | FileCheck %s --check-prefixes=SAME,COUNTER3
 
 declare i1 @get_val()
 declare void @llvm.assume(i1)
@@ -11,12 +11,12 @@
 define dso_local i1 @test1(i32* readonly %0) {
 ; COUNTER1-LABEL: @test1(
 ; COUNTER1-NEXT:    call void @llvm.assume(i1 true) [ "nonnull"(i32* [[TMP0:%.*]]) ]
-; COUNTER1-NEXT:    ret i1 false
+; COUNTER1-NEXT:    [[TMP2:%.*]] = icmp eq i32* [[TMP0]], null
+; COUNTER1-NEXT:    ret i1 [[TMP2]]
 ;
 ; COUNTER2-LABEL: @test1(
 ; COUNTER2-NEXT:    call void @llvm.assume(i1 true) [ "nonnull"(i32* [[TMP0:%.*]]) ]
-; COUNTER2-NEXT:    [[TMP2:%.*]] = icmp eq i32* [[TMP0]], null
-; COUNTER2-NEXT:    ret i1 [[TMP2]]
+; COUNTER2-NEXT:    ret i1 false
 ;
 ; COUNTER3-LABEL: @test1(
 ; COUNTER3-NEXT:    call void @llvm.assume(i1 true) [ "nonnull"(i32* [[TMP0:%.*]]) ]
@@ -35,13 +35,13 @@
 ; COUNTER1-NEXT:    ret i1 [[TMP2]]
 ;
 ; COUNTER2-LABEL: @test2(
-; COUNTER2-NEXT:    call void @llvm.assume(i1 true) [ "nonnull"(i32* [[TMP0:%.*]]) ]
-; COUNTER2-NEXT:    ret i1 false
+; COUNTER2-NEXT:    [[TMP2:%.*]] = icmp eq i32* [[TMP0:%.*]], null
+; COUNTER2-NEXT:    call void @llvm.assume(i1 true) [ "nonnull"(i32* [[TMP0]]) ]
+; COUNTER2-NEXT:    ret i1 [[TMP2]]
 ;
 ; COUNTER3-LABEL: @test2(
-; COUNTER3-NEXT:    [[TMP2:%.*]] = icmp eq i32* [[TMP0:%.*]], null
-; COUNTER3-NEXT:    call void @llvm.assume(i1 true) [ "nonnull"(i32* [[TMP0]]) ]
-; COUNTER3-NEXT:    ret i1 [[TMP2]]
+; COUNTER3-NEXT:    call void @llvm.assume(i1 true) [ "nonnull"(i32* [[TMP0:%.*]]) ]
+; COUNTER3-NEXT:    ret i1 false
 ;
   %2 = icmp eq i32* %0, null
   call void @llvm.assume(i1 true) ["nonnull"(i32* %0)]
@@ -49,48 +49,20 @@
 }
 
 define dso_local i32 @test4(i32* readonly %0, i1 %cond) {
-; COUNTER1-LABEL: @test4(
-; COUNTER1-NEXT:    call void @llvm.assume(i1 true) [ "dereferenceable"(i32* [[TMP0:%.*]], i32 4) ]
-; COUNTER1-NEXT:    br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]]
-; COUNTER1:       B:
-; COUNTER1-NEXT:    br label [[A]]
-; COUNTER1:       A:
-; COUNTER1-NEXT:    [[TMP2:%.*]] = icmp eq i32* [[TMP0]], null
-; COUNTER1-NEXT:    br i1 [[TMP2]], label [[TMP5:%.*]], label [[TMP3:%.*]]
-; COUNTER1:       3:
-; COUNTER1-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4
-; COUNTER1-NEXT:    br label [[TMP5]]
-; COUNTER1:       5:
-; COUNTER1-NEXT:    [[TMP6:%.*]] = phi i32 [ [[TMP4]], [[TMP3]] ], [ 0, [[A]] ]
-; COUNTER1-NEXT:    ret i32 [[TMP6]]
-;
-; COUNTER2-LABEL: @test4(
-; COUNTER2-NEXT:    call void @llvm.assume(i1 true) [ "dereferenceable"(i32* [[TMP0:%.*]], i32 4) ]
-; COUNTER2-NEXT:    br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]]
-; COUNTER2:       B:
-; COUNTER2-NEXT:    br label [[A]]
-; COUNTER2:       A:
-; COUNTER2-NEXT:    br i1 false, label [[TMP4:%.*]], label [[TMP2:%.*]]
-; COUNTER2:       2:
-; COUNTER2-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4
-; COUNTER2-NEXT:    br label [[TMP4]]
-; COUNTER2:       4:
-; COUNTER2-NEXT:    [[TMP5:%.*]] = phi i32 [ [[TMP3]], [[TMP2]] ], [ 0, [[A]] ]
-; COUNTER2-NEXT:    ret i32 [[TMP5]]
-;
-; COUNTER3-LABEL: @test4(
-; COUNTER3-NEXT:    call void @llvm.assume(i1 true) [ "dereferenceable"(i32* [[TMP0:%.*]], i32 4) ]
-; COUNTER3-NEXT:    br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]]
-; COUNTER3:       B:
-; COUNTER3-NEXT:    br label [[A]]
-; COUNTER3:       A:
-; COUNTER3-NEXT:    br i1 false, label [[TMP4:%.*]], label [[TMP2:%.*]]
-; COUNTER3:       2:
-; COUNTER3-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4
-; COUNTER3-NEXT:    br label [[TMP4]]
-; COUNTER3:       4:
-; COUNTER3-NEXT:    [[TMP5:%.*]] = phi i32 [ [[TMP3]], [[TMP2]] ], [ 0, [[A]] ]
-; COUNTER3-NEXT:    ret i32 [[TMP5]]
+; SAME-LABEL: @test4(
+; SAME-NEXT:    call void @llvm.assume(i1 true) [ "dereferenceable"(i32* [[TMP0:%.*]], i32 4) ]
+; SAME-NEXT:    br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]]
+; SAME:       B:
+; SAME-NEXT:    br label [[A]]
+; SAME:       A:
+; SAME-NEXT:    [[TMP2:%.*]] = icmp eq i32* [[TMP0]], null
+; SAME-NEXT:    br i1 [[TMP2]], label [[TMP5:%.*]], label [[TMP3:%.*]]
+; SAME:       3:
+; SAME-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4
+; SAME-NEXT:    br label [[TMP5]]
+; SAME:       5:
+; SAME-NEXT:    [[TMP6:%.*]] = phi i32 [ [[TMP4]], [[TMP3]] ], [ 0, [[A]] ]
+; SAME-NEXT:    ret i32 [[TMP6]]
 ;
   call void @llvm.assume(i1 true) ["dereferenceable"(i32* %0, i32 4)]
   br i1 %cond, label %A, label %B
diff --git a/llvm/test/Analysis/ValueTracking/assume.ll b/llvm/test/Analysis/ValueTracking/assume.ll
--- a/llvm/test/Analysis/ValueTracking/assume.ll
+++ b/llvm/test/Analysis/ValueTracking/assume.ll
@@ -91,6 +91,40 @@
   ret i32 %6
 }
 
+define dso_local i32 @test4a(i32* readonly %0, i1 %cond) {
+; CHECK-LABEL: @test4a(
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "dereferenceable"(i32* [[TMP0:%.*]], i32 4), "align"(i32* [[TMP0]], i32 8) ]
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]]
+; CHECK:       B:
+; CHECK-NEXT:    br label [[A]]
+; CHECK:       A:
+; CHECK-NEXT:    br i1 false, label [[TMP4:%.*]], label [[TMP2:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 8
+; CHECK-NEXT:    br label [[TMP4]]
+; CHECK:       4:
+; CHECK-NEXT:    [[TMP5:%.*]] = phi i32 [ [[TMP3]], [[TMP2]] ], [ 0, [[A]] ]
+; CHECK-NEXT:    ret i32 [[TMP5]]
+;
+  call void @llvm.assume(i1 true) ["dereferenceable"(i32* %0, i32 4), "align"(i32* %0, i32 8)]
+  br i1 %cond, label %A, label %B
+
+B:
+  br label %A
+
+A:
+  %2 = icmp eq i32* %0, null
+  br i1 %2, label %5, label %3
+
+3:                                                ; preds = %1
+  %4 = load i32, i32* %0, align 4
+  br label %5
+
+5:                                                ; preds = %1, %3
+  %6 = phi i32 [ %4, %3 ], [ 0, %A ]
+  ret i32 %6
+}
+
 define dso_local i32 @test4b(i32* readonly %0, i1 %cond) null_pointer_is_valid {
 ; CHECK-LABEL: @test4b(
 ; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "dereferenceable"(i32* [[TMP0:%.*]], i32 4) ]
diff --git a/llvm/test/Transforms/InstCombine/assume-align.ll b/llvm/test/Transforms/InstCombine/assume-align.ll
--- a/llvm/test/Transforms/InstCombine/assume-align.ll
+++ b/llvm/test/Transforms/InstCombine/assume-align.ll
@@ -11,7 +11,7 @@
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i8* [[PTR]], i64 4) ]
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i8* [[A]], i64 4) ]
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[PTR]] to i32*
 ; CHECK-NEXT:    store i32 4, i32* [[TMP3]], align 4
 ; CHECK-NEXT:    br label [[IF_END]]
diff --git a/llvm/test/Transforms/InstCombine/assume.ll b/llvm/test/Transforms/InstCombine/assume.ll
--- a/llvm/test/Transforms/InstCombine/assume.ll
+++ b/llvm/test/Transforms/InstCombine/assume.ll
@@ -10,13 +10,18 @@
 ; been removed:
 
 define i32 @foo1(i32* %a) #0 {
-; CHECK-LABEL: @foo1(
-; CHECK-NEXT:    [[T0:%.*]] = load i32, i32* [[A:%.*]], align 32
-; CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64
-; CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
-; CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-; CHECK-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND]])
-; CHECK-NEXT:    ret i32 [[T0]]
+; DEFAULT-LABEL: @foo1(
+; DEFAULT-NEXT:    [[T0:%.*]] = load i32, i32* [[A:%.*]], align 32
+; DEFAULT-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64
+; DEFAULT-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
+; DEFAULT-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+; DEFAULT-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND]])
+; DEFAULT-NEXT:    ret i32 [[T0]]
+;
+; BUNDLES-LABEL: @foo1(
+; BUNDLES-NEXT:    [[T0:%.*]] = load i32, i32* [[A:%.*]], align 32
+; BUNDLES-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32) ]
+; BUNDLES-NEXT:    ret i32 [[T0]]
 ;
   %t0 = load i32, i32* %a, align 4
   %ptrint = ptrtoint i32* %a to i64
@@ -29,13 +34,18 @@
 ; Same check as in @foo1, but make sure it works if the assume is first too.
 
 define i32 @foo2(i32* %a) #0 {
-; CHECK-LABEL: @foo2(
-; CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[A:%.*]] to i64
-; CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
-; CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-; CHECK-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND]])
-; CHECK-NEXT:    [[T0:%.*]] = load i32, i32* [[A]], align 32
-; CHECK-NEXT:    ret i32 [[T0]]
+; DEFAULT-LABEL: @foo2(
+; DEFAULT-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[A:%.*]] to i64
+; DEFAULT-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
+; DEFAULT-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+; DEFAULT-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND]])
+; DEFAULT-NEXT:    [[T0:%.*]] = load i32, i32* [[A]], align 32
+; DEFAULT-NEXT:    ret i32 [[T0]]
+;
+; BUNDLES-LABEL: @foo2(
+; BUNDLES-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[A:%.*]], i64 32) ]
+; BUNDLES-NEXT:    [[T0:%.*]] = load i32, i32* [[A]], align 32
+; BUNDLES-NEXT:    ret i32 [[T0]]
 ;
   %ptrint = ptrtoint i32* %a to i64
   %maskedptr = and i64 %ptrint, 31
@@ -282,17 +292,28 @@
 ; if the assume is control dependent on something else
 
 define i1 @nonnull3(i32** %a, i1 %control) {
-; CHECK-LABEL: @nonnull3(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[LOAD:%.*]] = load i32*, i32** [[A:%.*]], align 8
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32* [[LOAD]], null
-; CHECK-NEXT:    br i1 [[CONTROL:%.*]], label [[TAKEN:%.*]], label [[NOT_TAKEN:%.*]]
-; CHECK:       taken:
-; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
-; CHECK-NEXT:    ret i1 false
-; CHECK:       not_taken:
-; CHECK-NEXT:    [[RVAL_2:%.*]] = icmp sgt i32* [[LOAD]], null
-; CHECK-NEXT:    ret i1 [[RVAL_2]]
+; FIXME: in the BUNDLES version we could duplicate the load and keep the assume nonnull.
+; DEFAULT-LABEL: @nonnull3(
+; DEFAULT-NEXT:  entry:
+; DEFAULT-NEXT:    [[LOAD:%.*]] = load i32*, i32** [[A:%.*]], align 8
+; DEFAULT-NEXT:    [[CMP:%.*]] = icmp ne i32* [[LOAD]], null
+; DEFAULT-NEXT:    br i1 [[CONTROL:%.*]], label [[TAKEN:%.*]], label [[NOT_TAKEN:%.*]]
+; DEFAULT:       taken:
+; DEFAULT-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; DEFAULT-NEXT:    ret i1 false
+; DEFAULT:       not_taken:
+; DEFAULT-NEXT:    [[RVAL_2:%.*]] = icmp sgt i32* [[LOAD]], null
+; DEFAULT-NEXT:    ret i1 [[RVAL_2]]
+;
+; BUNDLES-LABEL: @nonnull3(
+; BUNDLES-NEXT:  entry:
+; BUNDLES-NEXT:    br i1 [[CONTROL:%.*]], label [[TAKEN:%.*]], label [[NOT_TAKEN:%.*]]
+; BUNDLES:       taken:
+; BUNDLES-NEXT:    ret i1 false
+; BUNDLES:       not_taken:
+; BUNDLES-NEXT:    [[LOAD:%.*]] = load i32*, i32** [[A:%.*]], align 8
+; BUNDLES-NEXT:    [[RVAL_2:%.*]] = icmp sgt i32* [[LOAD]], null
+; BUNDLES-NEXT:    ret i1 [[RVAL_2]]
 ;
 entry:
   %load = load i32*, i32** %a
@@ -312,12 +333,18 @@
 ; interrupted by an exception being thrown
 
 define i1 @nonnull4(i32** %a) {
-; CHECK-LABEL: @nonnull4(
-; CHECK-NEXT:    [[LOAD:%.*]] = load i32*, i32** [[A:%.*]], align 8
-; CHECK-NEXT:    tail call void @escape(i32* [[LOAD]])
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32* [[LOAD]], null
-; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
-; CHECK-NEXT:    ret i1 false
+; DEFAULT-LABEL: @nonnull4(
+; DEFAULT-NEXT:    [[LOAD:%.*]] = load i32*, i32** [[A:%.*]], align 8
+; DEFAULT-NEXT:    tail call void @escape(i32* [[LOAD]])
+; DEFAULT-NEXT:    [[CMP:%.*]] = icmp ne i32* [[LOAD]], null
+; DEFAULT-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; DEFAULT-NEXT:    ret i1 false
+;
+; BUNDLES-LABEL: @nonnull4(
+; BUNDLES-NEXT:    [[LOAD:%.*]] = load i32*, i32** [[A:%.*]], align 8
+; BUNDLES-NEXT:    tail call void @escape(i32* [[LOAD]])
+; BUNDLES-NEXT:    call void @llvm.assume(i1 true) [ "nonnull"(i32* [[LOAD]]) ]
+; BUNDLES-NEXT:    ret i1 false
 ;
   %load = load i32*, i32** %a
   ;; This call may throw!
@@ -371,15 +398,14 @@
 ; get in the way of the fold.
 
 define void @debug_interference(i8 %x) {
-; CHECK-LABEL: @debug_interference(
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne i8 [[X:%.*]], 0
-; CHECK-NEXT:    tail call void @llvm.assume(i1 false)
-; CHECK-NEXT:    tail call void @llvm.dbg.value(metadata i32 5, [[META7:metadata !.*]], metadata !DIExpression()), [[DBG9:!dbg !.*]]
-; CHECK-NEXT:    tail call void @llvm.assume(i1 false)
-; CHECK-NEXT:    tail call void @llvm.dbg.value(metadata i32 5, [[META7]], metadata !DIExpression()), [[DBG9]]
-; CHECK-NEXT:    tail call void @llvm.dbg.value(metadata i32 5, [[META7]], metadata !DIExpression()), [[DBG9]]
-; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP2]])
-; CHECK-NEXT:    ret void
+; SAME-LABEL: @debug_interference(
+; SAME-NEXT:    [[CMP2:%.*]] = icmp ne i8 [[X:%.*]], 0
+; SAME-NEXT:    tail call void @llvm.dbg.value(metadata i32 5, [[META7:metadata !.*]], metadata !DIExpression()), [[DBG9:!dbg !.*]]
+; SAME-NEXT:    tail call void @llvm.assume(i1 false)
+; SAME-NEXT:    tail call void @llvm.dbg.value(metadata i32 5, [[META7]], metadata !DIExpression()), [[DBG9]]
+; SAME-NEXT:    tail call void @llvm.dbg.value(metadata i32 5, [[META7]], metadata !DIExpression()), [[DBG9]]
+; SAME-NEXT:    tail call void @llvm.assume(i1 [[CMP2]])
+; SAME-NEXT:    ret void
 ;
   %cmp1 = icmp eq i8 %x, 0
   %cmp2 = icmp ne i8 %x, 0
@@ -412,17 +438,27 @@
 }
 
 define i1 @nonnull3A(i32** %a, i1 %control) {
-; CHECK-LABEL: @nonnull3A(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[LOAD:%.*]] = load i32*, i32** [[A:%.*]], align 8
-; CHECK-NEXT:    br i1 [[CONTROL:%.*]], label [[TAKEN:%.*]], label [[NOT_TAKEN:%.*]]
-; CHECK:       taken:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32* [[LOAD]], null
-; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
-; CHECK-NEXT:    ret i1 true
-; CHECK:       not_taken:
-; CHECK-NEXT:    [[RVAL_2:%.*]] = icmp sgt i32* [[LOAD]], null
-; CHECK-NEXT:    ret i1 [[RVAL_2]]
+; DEFAULT-LABEL: @nonnull3A(
+; DEFAULT-NEXT:  entry:
+; DEFAULT-NEXT:    [[LOAD:%.*]] = load i32*, i32** [[A:%.*]], align 8
+; DEFAULT-NEXT:    br i1 [[CONTROL:%.*]], label [[TAKEN:%.*]], label [[NOT_TAKEN:%.*]]
+; DEFAULT:       taken:
+; DEFAULT-NEXT:    [[CMP:%.*]] = icmp ne i32* [[LOAD]], null
+; DEFAULT-NEXT:    call void @llvm.assume(i1 [[CMP]])
+; DEFAULT-NEXT:    ret i1 true
+; DEFAULT:       not_taken:
+; DEFAULT-NEXT:    [[RVAL_2:%.*]] = icmp sgt i32* [[LOAD]], null
+; DEFAULT-NEXT:    ret i1 [[RVAL_2]]
+;
+; BUNDLES-LABEL: @nonnull3A(
+; BUNDLES-NEXT:  entry:
+; BUNDLES-NEXT:    br i1 [[CONTROL:%.*]], label [[TAKEN:%.*]], label [[NOT_TAKEN:%.*]]
+; BUNDLES:       taken:
+; BUNDLES-NEXT:    ret i1 true
+; BUNDLES:       not_taken:
+; BUNDLES-NEXT:    [[LOAD:%.*]] = load i32*, i32** [[A:%.*]], align 8
+; BUNDLES-NEXT:    [[RVAL_2:%.*]] = icmp sgt i32* [[LOAD]], null
+; BUNDLES-NEXT:    ret i1 [[RVAL_2]]
 ;
 entry:
   %load = load i32*, i32** %a
@@ -739,6 +775,17 @@
   unreachable
 }
 
+define void @canonicalize_assume(i32* %0) {
+; SAME-LABEL: @canonicalize_assume(
+; SAME-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[TMP0:%.*]], i64 8) ]
+; SAME-NEXT:    ret void
+;
+  %2 = getelementptr inbounds i32, i32* %0, i64 2
+  %3 = bitcast i32* %2 to i8*
+  call void @llvm.assume(i1 true) [ "align"(i8* %3, i64 16) ]
+  ret void
+}
+
 declare void @llvm.dbg.value(metadata, metadata, metadata)
 
 !llvm.dbg.cu = !{!0}